Data-Pond-0.005000755001750001750 013133614726 13226 5ustar00zeframzefram000000000000Data-Pond-0.005/.gitignore000444001750001750 22413133614716 15330 0ustar00zeframzefram000000000000/Build /Makefile /_build /blib /META.json /META.yml /MYMETA.json /MYMETA.yml /Makefile.PL /SIGNATURE /Data-Pond-* /lib/Data/Pond.c /lib/Data/Pond.o Data-Pond-0.005/Build.PL000444001750001750 375213133614716 14665 0ustar00zeframzefram000000000000{ use 5.006; } use warnings; use strict; use Module::Build; Module::Build->subclass(code => q{ unless(__PACKAGE__->can("cbuilder")) { *cbuilder = sub { $_[0]->_cbuilder or die "no C support" }; } unless(__PACKAGE__->can("have_c_compiler")) { *have_c_compiler = sub { my $cb = eval { $_[0]->cbuilder }; return $cb && $cb->have_compiler; }; } if($Module::Build::VERSION < 0.33) { # Older versions of Module::Build have a bug where if the # cbuilder object is used at Build.PL time (which it will # be for this distribution due to the logic in # ->find_xs_files) then that object can be dumped to the # build_params file, and then at Build time it will # attempt to use the dumped blessed object without loading # the ExtUtils::CBuilder class that is needed to make it # work. *write_config = sub { delete $_[0]->{properties}->{_cbuilder}; return $_[0]->SUPER::write_config; }; } sub find_xs_files { my($self) = @_; return {} unless $self->have_c_compiler; return $self->SUPER::find_xs_files; } })->new( module_name => "Data::Pond", license => "perl", configure_requires => { "Module::Build" => 0, "perl" => "5.008", "strict" => 0, "warnings" => 0, }, configure_recommends => { "ExtUtils::CBuilder" => "0.15", }, build_requires => { "Module::Build" => 0, "Test::More" => 0, "perl" => "5.008", "strict" => 0, "warnings" => 0, }, build_recommends => { "ExtUtils::CBuilder" => "0.15", }, requires => { "Exporter" => 0, "Carp" => 0, "Params::Classify" => 0, "parent" => 0, "perl" => "5.008", "strict" => 0, "warnings" => 0, }, recommends => { "XSLoader" => 0, }, needs_compiler => 0, dynamic_config => 0, meta_add => { distribution_type => "module" }, meta_merge => { "meta-spec" => { version => "2" }, resources => { bugtracker => { mailto => "bug-Data-Pond\@rt.cpan.org", web => "https://rt.cpan.org/Public/Dist/". "Display.html?Name=Data-Pond", }, }, }, sign => 1, )->create_build_script; 1; Data-Pond-0.005/Changes000444001750001750 365013133614716 14661 0ustar00zeframzefram000000000000version 0.005; 2017-07-19 * port to Perl 5.19.4, where the C type of array indices has changed * update test suite to not rely on . in @INC, which is no longer necessarily there from Perl 5.25.7 * no longer include a Makefile.PL in the distribution * in META.{yml,json}, point to public bug tracker * use cBOOL() where appropriate version 0.004; 2012-02-05 * in XS, declare "PROTOTYPES: DISABLE" to prevent automatic generation of unintended prototypes * in t/setup_pp.pl, avoid a warning that occurs if XSLoader::load() is given no arguments, which is now a valid usage * explicitly state version required of Params::Classify * in documentation, note that data structures for Pond can't be cyclic * correct a typo in documentation * correct dynamic_config setting to 0 * include META.json in distribution * convert .cvsignore to .gitignore * add MYMETA.json to .cvsignore version 0.003; 2010-10-20 * use full stricture in test suite * in Build.PL, explicitly declare configure-time requirements * in XS, use newSVpvs() and sv_catpvs_nomg() wherever appropriate * in XS, use PERL_NO_GET_CONTEXT for efficiency * also test POD coverage of pure Perl implementation * in Build.PL, explicitly set needs_compiler to avoid bogus auto-dependency on ExtUtils::CBuilder * add MYMETA.yml to .cvsignore version 0.002; 2009-11-04 * bugfix: in XS implementation, avoid memory leak when parsing hashes * port to Perl 5.11, supporting new first-class regexp objects in type checking * correct example of pond_write_datum options in synopsis * check for required Perl version at runtime * remove bogus "exit 0" from Build.PL version 0.001; 2009-05-15 * bugfix: correct flags on exported regexps (lack of /x caused these regexps to match the wrong things, but did not affect parsing with pond_read_datum()) version 0.000; 2009-05-14 * initial released version Data-Pond-0.005/MANIFEST000444001750001750 40213133614716 14467 0ustar00zeframzefram000000000000.gitignore Build.PL Changes MANIFEST META.json META.yml README lib/Data/Pond.pm lib/Data/Pond.xs t/error.t t/error_pp.t t/expr.t t/expr_pp.t t/pod_cvg.t t/pod_cvg_pp.t t/pod_syn.t t/setup_pp.pl t/undef.t t/undef_pp.t SIGNATURE Added here by Module::Build Data-Pond-0.005/META.json000444001750001750 313513133614716 15005 0ustar00zeframzefram000000000000{ "abstract" : "Perl-based open notation for data", "author" : [ "Andrew Main (Zefram) " ], "dynamic_config" : 0, "generated_by" : "Module::Build version 0.4224", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "Data-Pond", "prereqs" : { "build" : { "requires" : { "Module::Build" : "0", "Test::More" : "0", "perl" : "5.008", "strict" : "0", "warnings" : "0" } }, "configure" : { "requires" : { "Module::Build" : "0", "perl" : "5.008", "strict" : "0", "warnings" : "0" } }, "runtime" : { "recommends" : { "XSLoader" : "0" }, "requires" : { "Carp" : "0", "Exporter" : "0", "Params::Classify" : "0", "parent" : "0", "perl" : "5.008", "strict" : "0", "warnings" : "0" } } }, "provides" : { "Data::Pond" : { "file" : "lib/Data/Pond.pm", "version" : "0.005" } }, "release_status" : "stable", "resources" : { "bugtracker" : { "mailto" : "bug-Data-Pond@rt.cpan.org", "web" : "https://rt.cpan.org/Public/Dist/Display.html?Name=Data-Pond" }, "license" : [ "http://dev.perl.org/licenses/" ] }, "version" : "0.005", "x_serialization_backend" : "JSON::PP version 2.93" } Data-Pond-0.005/META.yml000444001750001750 165213133614716 14637 0ustar00zeframzefram000000000000--- abstract: 'Perl-based open notation for data' author: - 'Andrew Main (Zefram) ' build_requires: Module::Build: '0' Test::More: '0' perl: '5.008' strict: '0' warnings: '0' configure_requires: Module::Build: '0' perl: '5.008' strict: '0' warnings: '0' dynamic_config: 0 generated_by: 'Module::Build version 0.4224, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Data-Pond provides: Data::Pond: file: lib/Data/Pond.pm version: '0.005' recommends: XSLoader: '0' requires: Carp: '0' Exporter: '0' Params::Classify: '0' parent: '0' perl: '5.008' strict: '0' warnings: '0' resources: bugtracker: https://rt.cpan.org/Public/Dist/Display.html?Name=Data-Pond license: http://dev.perl.org/licenses/ version: '0.005' x_serialization_backend: 'CPAN::Meta::YAML version 0.012' Data-Pond-0.005/README000444001750001750 526113133614716 14246 0ustar00zeframzefram000000000000NAME Data::Pond - Perl-based open notation for data DESCRIPTION This module is concerned with representing data structures in a textual notation known as "Pond" (Perl-based open notation for data). The notation is a strict subset of Perl expression syntax, but is intended to have language-independent use. It is similar in spirit to JSON, which is based on JavaScript, but Pond represents fewer data types directly. The data that can be represented in Pond consist of strings (of characters), arrays, and string-keyed hashes. Arrays and hashes can recursively (but not cyclically) contain any of these kinds of data. This does not cover the full range of data types that Perl or other languages can handle, but is intended to be a limited, fixed repertoire of data types that many languages can readily process. It is intended that more complex data can be represented using these basic types. The arrays and hashes provide structuring facilities (ordered and unordered collections, respectively), and strings are a convenient way to represent atomic data. The Pond syntax is a subset of Perl expression syntax, consisting of string literals and constructors for arrays and hashes. Strings may be single-quoted or double-quoted, or may be decimal integer literals. Double-quoted strings are restricted in which backslash sequences they can use: the permitted ones are the single-character ones (such as "\n"), "\x" sequences (such as "\xe3" and "\x{e3}"), and octal digit sequences (such as "\010"). Non-ASCII characters are acceptable in quoted strings. Strings may also appear as pure-ASCII barewords, when they directly precede "=>" in an array or hash constructor. Array ("[]") and hash ("{}") constructors must contain data items separated by "," and "=>" commas, and can have a trailing comma but not adjacent commas. Whitespace is permitted where Perl allows it. Control characters are not permitted, except for whitespace outside strings. A Pond expression can be "eval"ed by Perl to yield the data item that it represents, but this is not the recommended way to do it. Any use of "eval" on data opens up security issues. Instead use the "pond_read_datum" function of this module, which does not use Perl's parser but directly parses the restricted Pond syntax. This module is implemented in XS, with a pure Perl backup version for systems that can't handle XS. INSTALLATION perl Build.PL ./Build ./Build test ./Build install AUTHOR Andrew Main (Zefram) COPYRIGHT Copyright (C) 2009 PhotoBox Ltd Copyright (C) 2010, 2012, 2017 Andrew Main (Zefram) LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Data-Pond-0.005/SIGNATURE000644001750001750 341613133614726 14655 0ustar00zeframzefram000000000000This file contains message digests of all files listed in MANIFEST, signed via the Module::Signature module, version 0.81. To verify the content in this distribution, first make sure you have Module::Signature installed, then type: % cpansign -v It will check each file's integrity, as well as the signature's validity. If "==> Signature verified OK! <==" is not displayed, the distribution may already have been compromised, and you should not run its Makefile.PL or Build.PL. -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 SHA1 342aaa3957fa70337aa38354380cf010a4e80fce .gitignore SHA1 d70a0178b6a0899844f165a6ce8114ea5a65aa41 Build.PL SHA1 6fad37485254a071e1028eb2cd3327a136bfbc4e Changes SHA1 0b1c66e028a1f1c59ab80034a593813ef11106ee MANIFEST SHA1 5a6fd5afaf006338fc3a60f96add90eeda7453a5 META.json SHA1 d7e4f0d7588aecdbd8e3a6ded1e434fc75172791 META.yml SHA1 4793e19afdb959271ec799fb774895f445e2ce88 README SHA1 bee83ca4ac2711d97f9f9adb020204d75420923f lib/Data/Pond.pm SHA1 15e8efc0fc1baf867331c3e2ea8a662ef0ef3c51 lib/Data/Pond.xs SHA1 ab7c678d9ef301a801ac51969503e8593c708ae7 t/error.t SHA1 a1db817b20659750c1acc66816815e9604286b3a t/error_pp.t SHA1 05b63917afb7f9f1ed69ebc9b0f53ad0a26ce3bb t/expr.t SHA1 96c2bb83a066e0299d144c3510c487203eacb486 t/expr_pp.t SHA1 904d9a4f76525e2303e4b0c168c68230f223c8de t/pod_cvg.t SHA1 8b0ef0af30cd5064cf1b3d57c5fdbab11f8c567c t/pod_cvg_pp.t SHA1 65c75abdef6f01a5d1588a307f2ddfe2333dc961 t/pod_syn.t SHA1 32df4a9820e3883e6d89324939a7994855eaa9fe t/setup_pp.pl SHA1 ab1a496e36f7fa2f532ce716aa737a5bb2935e69 t/undef.t SHA1 0c10c3a71cd614c7205f22182d469d81a9f4b503 t/undef_pp.t -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iEYEARECAAYFAllvGc4ACgkQOV9mt2VyAVEpvwCeKasCL7zXVC3ojfNU6Dlt9kPr jqcAoIQCxTS8n8GJdnJ5ohnXoHg5oLh2 =ITsh -----END PGP SIGNATURE----- Data-Pond-0.005/lib000755001750001750 013133614716 13773 5ustar00zeframzefram000000000000Data-Pond-0.005/lib/Data000755001750001750 013133614716 14644 5ustar00zeframzefram000000000000Data-Pond-0.005/lib/Data/Pond.pm000444001750001750 3322313133614716 16262 0ustar00zeframzefram000000000000=head1 NAME Data::Pond - Perl-based open notation for data =head1 SYNOPSIS use Data::Pond qw($pond_datum_rx); if($expr =~ /\A$pond_datum_rx\z/o) { ... # and other regular expressions use Data::Pond qw(pond_read_datum pond_write_datum); $datum = pond_read_datum($text); $text = pond_write_datum($datum); $text = pond_write_datum($datum, { indent => 0 }); =head1 DESCRIPTION This module is concerned with representing data structures in a textual notation known as "Pond" (I

erl-based Ipen Iotation for Iata). The notation is a strict subset of Perl expression syntax, but is intended to have language-independent use. It is similar in spirit to JSON, which is based on JavaScript, but Pond represents fewer data types directly. The data that can be represented in Pond consist of strings (of characters), arrays, and string-keyed hashes. Arrays and hashes can recursively (but not cyclically) contain any of these kinds of data. This does not cover the full range of data types that Perl or other languages can handle, but is intended to be a limited, fixed repertoire of data types that many languages can readily process. It is intended that more complex data can be represented using these basic types. The arrays and hashes provide structuring facilities (ordered and unordered collections, respectively), and strings are a convenient way to represent atomic data. The Pond syntax is a subset of Perl expression syntax, consisting of string literals and constructors for arrays and hashes. Strings may be single-quoted or double-quoted, or may be decimal integer literals. Double-quoted strings are restricted in which backslash sequences they can use: the permitted ones are the single-character ones (such as C<\n>), C<\x> sequences (such as C<\xe3> and C<\x{e3}>), and octal digit sequences (such as C<\010>). Non-ASCII characters are acceptable in quoted strings. Strings may also appear as pure-ASCII barewords, when they directly precede C<< => >> in an array or hash constructor. Array (C<[]>) and hash (C<{}>) constructors must contain data items separated by C<,> and C<< => >> commas, and can have a trailing comma but not adjacent commas. Whitespace is permitted where Perl allows it. Control characters are not permitted, except for whitespace outside strings. A Pond expression can be Ced by Perl to yield the data item that it represents, but this is not the recommended way to do it. Any use of C on data opens up security issues. Instead use the L function of this module, which does not use Perl's parser but directly parses the restricted Pond syntax. This module is implemented in XS, with a pure Perl backup version for systems that can't handle XS. =cut package Data::Pond; { use 5.008; } use warnings; use strict; our $VERSION = "0.005"; use parent "Exporter"; our @EXPORT_OK = qw( $pond_string_rx $pond_ascii_string_rx $pond_array_rx $pond_ascii_array_rx $pond_hash_rx $pond_ascii_hash_rx $pond_datum_rx $pond_ascii_datum_rx pond_read_datum pond_write_datum ); =head1 REGULAR EXPRESSIONS Each of these regular expressions corresponds precisely to part of Pond syntax. The regular expressions do not include any anchors, so to check whether an entire string matches a production you must supply the anchors yourself. The regular expressions with C<_ascii_> in the name match the subset of the grammar that uses only ASCII characters. All Pond data can be expressed using only ASCII characters. =over =item $pond_string_rx =item $pond_ascii_string_rx A string literal. This may be a double-quoted string, a single-quoted string, or a decimal integer literal. It does not accept barewords. =cut my $pond_optwsp_rx = qr/[\t\n\f\r ]*/; my $pond_dqstringchar_rx = qr/[\ -\!\#\%-\?A-\[\]-\~\x{a1}-\x{7fffffff}]/; my $pond_dqstring_rx = qr/(?>"(?: $pond_dqstringchar_rx+ |\\(?:[\ -befnrt\{-\~\x{a1}-\x{7fffffff}] |x(?:[0-9a-fA-F]|\{[0-9a-fA-F]+\})) )*")/x; my $pond_ascii_dqstring_rx = qr/(?>"(?: [\ -\!\#\%-\?A-\[\]-\~]+ |\\(?:[\ -befnrt\{-\~] |x(?:[0-9a-fA-F]|\{[0-9a-fA-F]+\})) )*")/x; my $pond_sqstringchar_rx = qr/[\ -\&\(-\[\]-\~\x{a1}-\x{7fffffff}]/; my $pond_sqstring_rx = qr/(?>'(?: $pond_sqstringchar_rx+ |\\[\ -\~\x{a1}-\x{7fffffff}] )*')/x; my $pond_ascii_sqstring_rx = qr/(?>'(?: [\ -\&\(-\[\]-\~]+ |\\[\ -\~] )*')/x; my $pond_number_rx = qr/0|[1-9][0-9]*/; our $pond_string_rx = qr/$pond_dqstring_rx |$pond_sqstring_rx |$pond_number_rx/xo; our $pond_ascii_string_rx = qr/$pond_ascii_dqstring_rx |$pond_ascii_sqstring_rx |$pond_number_rx/xo; my $pond_bareword_rx = qr/(?>[A-Za-z_][0-9A-Za-z_]*(?=$pond_optwsp_rx=>))/o; my $pond_interior_string_rx = qr/$pond_bareword_rx|$pond_string_rx/o; my $pond_ascii_interior_string_rx = qr/$pond_bareword_rx|$pond_ascii_string_rx/o; =item $pond_array_rx =item $pond_ascii_array_rx An array C<[]> constructor. =cut my $pond_interior_datum_rx = do { use re "eval"; qr/$pond_bareword_rx|(??{$Data::Pond::pond_datum_rx})/o }; my $pond_ascii_interior_datum_rx = do { use re "eval"; qr/$pond_bareword_rx|(??{$Data::Pond::pond_ascii_datum_rx})/o }; my $pond_comma_rx = qr/,|=>/; our $pond_array_rx = qr/(?>\[$pond_optwsp_rx (?>$pond_interior_datum_rx$pond_optwsp_rx $pond_comma_rx$pond_optwsp_rx)* (?:$pond_interior_datum_rx$pond_optwsp_rx)? \])/xo; our $pond_ascii_array_rx = qr/(?>\[$pond_optwsp_rx (?>$pond_ascii_interior_datum_rx$pond_optwsp_rx $pond_comma_rx$pond_optwsp_rx)* (?:$pond_ascii_interior_datum_rx$pond_optwsp_rx)? \])/xo; =item $pond_hash_rx =item $pond_ascii_hash_rx A hash C<{}> constructor. =cut my $pond_hashelem_rx = qr/ $pond_interior_string_rx$pond_optwsp_rx $pond_comma_rx$pond_optwsp_rx$pond_interior_datum_rx /xo; my $pond_ascii_hashelem_rx = qr/ $pond_ascii_interior_string_rx$pond_optwsp_rx $pond_comma_rx$pond_optwsp_rx$pond_ascii_interior_datum_rx /xo; our $pond_hash_rx = qr/(?>\{$pond_optwsp_rx (?>$pond_hashelem_rx$pond_optwsp_rx$pond_comma_rx$pond_optwsp_rx)* (?:$pond_hashelem_rx$pond_optwsp_rx)? \})/xo; our $pond_ascii_hash_rx = qr/(?>\{$pond_optwsp_rx (?>$pond_ascii_hashelem_rx$pond_optwsp_rx$pond_comma_rx$pond_optwsp_rx)* (?:$pond_ascii_hashelem_rx$pond_optwsp_rx)? \})/xo; =item $pond_datum_rx =item $pond_ascii_datum_rx Any permitted expression. This may be a string literal, array constructor, or hash constructor. =cut our $pond_datum_rx = qr/$pond_string_rx |$pond_array_rx |$pond_hash_rx/xo; our $pond_ascii_datum_rx = qr/$pond_ascii_string_rx |$pond_ascii_array_rx |$pond_ascii_hash_rx/xo; =back =cut eval { local $SIG{__DIE__}; require XSLoader; XSLoader::load(__PACKAGE__, $VERSION); }; if($@ eq "") { close(DATA); } else { (my $filename = __FILE__) =~ tr# -~##cd; local $/ = undef; my $pp_code = "#line 223 \"$filename\"\n".; close(DATA); { local $SIG{__DIE__}; eval $pp_code; } die $@ if $@ ne ""; } 1; __DATA__ use Params::Classify 0.000 qw(is_undef is_string is_ref); =head1 FUNCTIONS =over =item pond_read_datum(TEXT) I is a character string. This function parses it as a Pond-encoded datum, with optional surrounding whitespace, returning the represented item as a Perl native datum. Cs if a malformed item is encountered. =cut my %str_decode = ( "a" => "\a", "b" => "\b", "t" => "\t", "n" => "\n", "f" => "\f", "r" => "\r", "e" => "\e", ); sub _subexpr_skip_ws($) { my($exprref) = @_; $$exprref =~ /\G[\t\n\f\r ]+/gc; } sub _subexpr_datum($); sub _subexpr_datum($) { my($exprref) = @_; if($$exprref =~ /\G([A-Za-z_][0-9A-Za-z_]*)(?=[\t\n\f\r ]*=>)/gc) { return $1; } elsif($$exprref =~ /\G\"/gc) { my $datum = ""; until($$exprref =~ /\G\"/gc) { if($$exprref =~ /\G\\([0-7]{1,3})/gc) { $datum .= chr(oct($1)); } elsif($$exprref =~ /\G\\x([0-9a-fA-F]{1,2})/gc) { $datum .= chr(hex($1)); } elsif($$exprref =~ /\G\\x\{([0-9a-fA-F]+)\}/gc) { my $hexval = $1; unless($hexval =~ /\A0*(?:0 |[1-7][0-9a-fA-F]{0,7} |[8-9a-fA-F][0-9a-fA-F]{0,6} )\z/x) { die "Pond constraint error: ". "invalid character\n"; } $datum .= chr(hex($hexval)); } elsif($$exprref =~ /\G\\([a-zA-Z])/gc) { my $c = $str_decode{$1}; die "Pond syntax error\n" unless defined $c; $datum .= $c; } elsif($$exprref =~ /\G\\([\ -\~\x{a1}-\x{7fffffff}])/gc) { $datum .= $1; } elsif($$exprref =~ /\G($pond_dqstringchar_rx+)/ogc) { $datum .= $1; } else { die "Pond syntax error\n" } } return $datum; } elsif($$exprref =~ /\G\'/gc) { my $datum = ""; until($$exprref =~ /\G\'/gc) { if($$exprref =~ /\G\\([\'\\])/gc) { $datum .= $1; } elsif($$exprref =~ /\G(\\|$pond_sqstringchar_rx+)/ogc) { $datum .= $1; } else { die "Pond syntax error\n" } } return $datum; } elsif($$exprref =~ /\G(0|[1-9][0-9]*)/gc) { return $1; } elsif($$exprref =~ /\G([\[\{])/gc) { my $type = $1 eq "[" ? "ARRAY" : "HASH"; my $close = $1 eq "[" ? qr/\]/ : qr/\}/; my @data; while(1) { _subexpr_skip_ws($exprref); last if $$exprref =~ /\G$close/gc; push @data, _subexpr_datum($exprref); _subexpr_skip_ws($exprref); last if $$exprref =~ /\G$close/gc; die "Pond syntax error\n" unless $$exprref =~ /\G(?:,|=>)/gc; } return \@data if $type eq "ARRAY"; die "Pond constraint error: ". "odd number of elements in hash constructor\n" if scalar(@data) & 1; for(my $i = @data; $i; ) { $i -= 2; die "Pond constraint error: non-string hash key\n" unless is_string($data[$i]); } return {@data}; } else { die "Pond syntax error\n" } } sub pond_read_datum($) { my($text) = @_; die "Pond data error: text isn't a string\n" unless is_string($text); _subexpr_skip_ws(\$text); my $datum = _subexpr_datum(\$text); _subexpr_skip_ws(\$text); die "Pond syntax error\n" unless $text =~ /\G\z/gc; return $datum; } =item pond_write_datum(DATUM[, OPTIONS]) I is a Perl native datum. This function serialises it as a character string using Pond encoding. The data to be serialised can recursively contain Perl strings, arrays, and hashes. Numbers are implicitly stringified, and C is treated as the empty string. Cs if an unserialisable datum is encountered. I, if present, must be a reference to a hash, containing options that control the serialisation process. The recognised options are: =over =item B If C (which is the default), no optional whitespace will be added. Otherwise it must be a non-negative integer, and the datum will be laid out with whitespace (where it is optional) to illustrate the structure by indentation. The number given must be the number of leading spaces on the line on which the resulting element will be placed. If whitespace is added, the element will be arranged to end on a line of the same indentation, and all intermediate lines will have greater indentation. =item B If false (the default), C will be treated as invalid data. If true, C will be serialised as an empty string. =item B If false (the default), the datum will be expressed using only ASCII characters. If true, non-ASCII characters may be used in string literals. =back =cut my %str_encode = ( "\t" => "\\t", "\n" => "\\n", "\"" => "\\\"", "\$" => "\\\$", "\@" => "\\\@", "\\" => "\\\\", ); foreach(0x00..0x1f, 0x7f..0xa0) { my $c = chr($_); $str_encode{$c} = sprintf("\\x%02x", $_) unless exists $str_encode{$c}; } sub _strdatum_to_string($$) { my($str, $options) = @_; return $str if $str =~ /\A(?:0|[1-9][0-9]{0,8})\z/; die "Pond data error: invalid character\n" unless $str =~ /\A[\x{0}-\x{7fffffff}]*\z/; $str =~ s/([\x00-\x1f\"\$\@\\\x7f-\xa0])/$str_encode{$1}/eg; $str =~ s/([^\x00-\x7f])/sprintf("\\x{%02x}", ord($1))/eg unless $options->{unicode}; return "\"$str\""; } sub _strdatum_to_bareword($$) { return $_[0] =~ /\A[A-Za-z_][0-9A-Za-z_]*\z/ ? $_[0] : &_strdatum_to_string; } sub pond_write_datum($;$); sub pond_write_datum($;$) { my($datum, $options) = @_; $options = {} unless defined $options; if(is_undef($datum) && $options->{undef_is_empty}) { return '""'; } elsif(is_string($datum)) { return _strdatum_to_string($datum, $options); } elsif(is_ref($datum, "ARRAY")) { return "[]" if @$datum == 0; if(defined $options->{indent}) { my $indent = $options->{indent}; my $subindent = $indent + 4; my $indent_str = "\n"." "x$indent; my $subindent_str = "\n"." "x$subindent; my $suboptions = { %$options, indent => $subindent }; return join("", "[", (map { ( $subindent_str, pond_write_datum($_, $suboptions), ",", ) } @$datum), $indent_str, "]"); } else { return "[".join(",", map { pond_write_datum($_, $options) } @$datum)."]"; } } elsif(is_ref($datum, "HASH")) { return "{}" if keys(%$datum) == 0; if(defined $options->{indent}) { my $indent = $options->{indent}; my $subindent = $indent + 4; my $indent_str = "\n"." "x$indent; my $subindent_str = "\n"." "x$subindent; my $suboptions = { %$options, indent => $subindent }; return join("", "{", (map { ( $subindent_str, _strdatum_to_bareword($_, $options), " => ", pond_write_datum($datum->{$_}, $suboptions), ",", ) } sort keys %$datum), $indent_str, "}"); } else { return "{".join(",", map { _strdatum_to_bareword($_, $options)."=>". pond_write_datum($datum->{$_}, $options) } sort keys %$datum)."}"; } } else { die "Pond data error: unsupported data type\n"; } } =back =head1 SEE ALSO L, L, L =head1 AUTHOR Andrew Main (Zefram) =head1 COPYRIGHT Copyright (C) 2009 PhotoBox Ltd Copyright (C) 2010, 2012, 2017 Andrew Main (Zefram) =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; Data-Pond-0.005/lib/Data/Pond.xs000444001750001750 5240213133614716 16300 0ustar00zeframzefram000000000000#define PERL_NO_GET_CONTEXT 1 #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #define PERL_VERSION_DECIMAL(r,v,s) (r*1000000 + v*1000 + s) #define PERL_DECIMAL_VERSION \ PERL_VERSION_DECIMAL(PERL_REVISION,PERL_VERSION,PERL_SUBVERSION) #define PERL_VERSION_GE(r,v,s) \ (PERL_DECIMAL_VERSION >= PERL_VERSION_DECIMAL(r,v,s)) #ifndef cBOOL # define cBOOL(x) ((bool)!!(x)) #endif /* !cBOOL */ #ifndef hv_fetchs # define hv_fetchs(hv, keystr, lval) \ hv_fetch(hv, ""keystr"", sizeof(keystr)-1, lval) #endif /* !hv_fetchs */ #ifndef newSVpvs # define newSVpvs(string) newSVpvn(""string"", sizeof(string)-1) #endif /* !newSVpvs */ #ifndef sv_catpvs_nomg # define sv_catpvs_nomg(sv, string) \ sv_catpvn_nomg(sv, ""string"", sizeof(string)-1) #endif /* !sv_catpvs_nomg */ #if PERL_VERSION_GE(5,19,4) typedef SSize_t array_ix_t; #else /* <5.19.4 */ typedef I32 array_ix_t; #endif /* <5.19.4 */ /* parameter classification */ #define sv_is_glob(sv) (SvTYPE(sv) == SVt_PVGV) #if PERL_VERSION_GE(5,11,0) # define sv_is_regexp(sv) (SvTYPE(sv) == SVt_REGEXP) #else /* <5.11.0 */ # define sv_is_regexp(sv) 0 #endif /* <5.11.0 */ #define sv_is_undef(sv) (!sv_is_glob(sv) && !sv_is_regexp(sv) && !SvOK(sv)) #define sv_is_string(sv) \ (!sv_is_glob(sv) && !sv_is_regexp(sv) && \ (SvFLAGS(sv) & (SVf_IOK|SVf_NOK|SVf_POK|SVp_IOK|SVp_NOK|SVp_POK))) /* exceptions */ #define throw_utf8_error() croak("broken internal UTF-8 encoding\n") #define throw_syntax_error(p) croak("Pond syntax error\n") #define throw_constraint_error(MSG) croak("Pond constraint error: "MSG"\n") #define throw_data_error(MSG) croak("Pond data error: "MSG"\n") /* * string walking * * The parser deals with strings that are internally encoded using Perl's * extended form of UTF-8. It is not assumed that the encoding is * well-formed; encoding errors will result in an exception. The encoding * octets are treated as U8 type. * * Characters that are known to be in the ASCII range are in some places * processed as U8. General Unicode characters are processed as U32, with * the intent that the entire ISO-10646 31-bit range be handleable. Any * codepoint is accepted for processing, even the surrogates (which are * not legal in true UTF-8 encoding). Perl's extended UTF-8 extends to * 72-bit codepoints; encodings beyond the 31-bit range are translated to * codepoint U+80000000, whereby they are all treated as invalid. * * char_unicode() returns the codepoint represented by the character being * pointed at, or throws an exception if the encoding is malformed. * * To move on to the character following the one pointed at, use the core * macro UTF8SKIP(), as in (p + UTF8SKIP(p)). It assumes that the character * is properly encoded, so it is essential that char_unicode() has been * called on it first. * * Given an input SV (that is meant to be a string), pass it through * upgrade_sv() to return an SV that contains the string in UTF-8. This * could be either the same SV (if it is already UTF-8-encoded or contains * no non-ASCII characters) or a mortal upgraded copy. */ #define char_unicode(p) THX_char_unicode(aTHX_ p) static U32 THX_char_unicode(pTHX_ U8 *p) { U32 val = *p; U8 req_c1; int ncont; int i; if(!(val & 0x80)) return val; if(!(val & 0x40)) throw_utf8_error(); if(!(val & 0x20)) { if(!(val & 0x1e)) throw_utf8_error(); val &= 0x1f; ncont = 1; req_c1 = 0x00; } else if(!(val & 0x10)) { val &= 0x0f; ncont = 2; req_c1 = 0x20; } else if(!(val & 0x08)) { val &= 0x07; ncont = 3; req_c1 = 0x30; } else if(!(val & 0x04)) { val &= 0x03; ncont = 4; req_c1 = 0x38; } else if(!(val & 0x02)) { val &= 0x01; ncont = 5; req_c1 = 0x3c; } else if(!(val & 0x01)) { if(!(p[1] & 0x3e)) throw_utf8_error(); for(i = 6; i--; ) if((*++p & 0xc0) != 0x80) throw_utf8_error(); return 0x80000000; } else { U8 first_six = 0; for(i = 6; i--; ) { U8 ext = *++p; if((ext & 0xc0) != 0x80) throw_utf8_error(); first_six |= ext; } if(!(first_six & 0x3f)) throw_utf8_error(); for(i = 6; i--; ) if((*++p & 0xc0) != 0x80) throw_utf8_error(); return 0x80000000; } if(val == 0 && !(p[1] & req_c1)) throw_utf8_error(); for(i = ncont; i--; ) { U8 ext = *++p; if((ext & 0xc0) != 0x80) throw_utf8_error(); val = UTF8_ACCUMULATE(val, ext); } return val; } #define sv_cat_unichar(str, val) THX_sv_cat_unichar(aTHX_ str, val) static void THX_sv_cat_unichar(pTHX_ SV *str, U32 val) { STRLEN vlen; U8 *vstart, *voldend, *vnewend; vlen = SvCUR(str); vstart = (U8*)SvGROW(str, vlen+6+1); voldend = vstart + vlen; vnewend = uvuni_to_utf8_flags(voldend, val, UNICODE_ALLOW_ANY); *vnewend = 0; SvCUR_set(str, vnewend - vstart); } #define upgrade_sv(input) THX_upgrade_sv(aTHX_ input) static SV *THX_upgrade_sv(pTHX_ SV *input) { U8 *p, *end; STRLEN len; if(SvUTF8(input)) return input; p = (U8*)SvPV(input, len); for(end = p + len; p != end; p++) { if(*p & 0x80) { SV *output = sv_mortalcopy(input); sv_utf8_upgrade(output); return output; } } return input; } /* * Pond reading */ #define CHARATTR_WSP 0x01 #define CHARATTR_DQSPECIAL 0x02 #define CHARATTR_CONTROL 0x04 #define CHARATTR_HEXDIGIT 0x08 #define CHARATTR_WORDSTART 0x10 #define CHARATTR_WORDCONT 0x20 #define CHARATTR_DECDIGIT 0x40 #define CHARATTR_OCTDIGIT 0x80 static U8 const asciichar_attr[128] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, /* NUL to BEL */ 0x04, 0x05, 0x05, 0x04, 0x05, 0x05, 0x04, 0x04, /* BS to SI */ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, /* DLE to ETB */ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, /* CAN to US */ 0x01, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, /* SP to ' */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ( to / */ 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, /* 0 to 7 */ 0x68, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 to ? */ 0x02, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x30, /* @ to G */ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* H to O */ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* P to W */ 0x30, 0x30, 0x30, 0x00, 0x02, 0x00, 0x00, 0x30, /* X to _ */ 0x00, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x30, /* ` to g */ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* h to o */ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* p to w */ 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x04, /* x to DEL */ }; static int char_is_wsp(U8 c) { return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_WSP); } static int char_is_dqspecial(U8 c) { return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_DQSPECIAL); } static int char_is_control(U8 c) { return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_CONTROL); } static int unichar_is_control(U32 c) { return (c >= 0x80) ? c <= 0xa0 : (asciichar_attr[c] & CHARATTR_CONTROL); } static int char_is_wordstart(U8 c) { return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_WORDSTART); } static int char_is_wordcont(U8 c) { return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_WORDCONT); } static int char_is_decdigit(U8 c) { return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_DECDIGIT); } static int char_is_octdigit(U8 c) { return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_OCTDIGIT); } static int char_is_hexdigit(U8 c) { return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_HEXDIGIT); } static int hexdigit_value(U8 c) { return c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10; } static U8 *parse_opt_wsp(U8 *p) { while(char_is_wsp(*p)) p++; return p; } static U8 const asciichar_backslash[128] = { 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* NUL to BEL */ 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* BS to SI */ 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* DLE to ETB */ 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* CAN to US */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* SP to ' */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ( to / */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0 to 7 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 8 to ? */ 0x40, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* @ to G */ 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* H to O */ 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* P to W */ 0xfd, 0xfd, 0xfd, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* X to _ */ 0x60, 0x07, 0x08, 0xfd, 0xfd, 0x1b, 0x0c, 0xfd, /* ` to g */ 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0x0a, 0xfd, /* h to o */ 0xfd, 0xfd, 0x0d, 0xfd, 0x09, 0xfd, 0xfd, 0xfd, /* p to w */ 0xfe, 0xfd, 0xfd, 0x7b, 0x7c, 0x7d, 0x7e, 0xfd, /* x to DEL */ }; #define parse_dqstring(end, pp) THX_parse_dqstring(aTHX_ end, pp) static SV *THX_parse_dqstring(pTHX_ U8 *end, U8 **pp) { U8 *p = *pp; SV *datum = sv_2mortal(newSVpvs("")); SvUTF8_on(datum); while(1) { U8 c = *p, e; if(p == end || char_is_control(c)) throw_syntax_error(p); if(!char_is_dqspecial(c)) { U8 *q = p; do { U32 val = char_unicode(q); if(unichar_is_control(val)) throw_syntax_error(q); q += UTF8SKIP(q); c = *q; } while(q != end && !char_is_dqspecial(c)); sv_catpvn_nomg(datum, (char*)p, q-p); p = q; continue; } if(c == '"') break; if(c != '\\') throw_syntax_error(p); c = *++p; if(p == end) throw_syntax_error(p); if(c & 0x80) { U32 val = char_unicode(p); if(unichar_is_control(val)) throw_syntax_error(q); /* character will be treated as literal anyway */ continue; } e = asciichar_backslash[c]; if(e == 0xff) { U32 val = c & 7; c = *++p; if(char_is_octdigit(c)) { p++; val = (val << 3) | (c & 7); c = *p; if(char_is_octdigit(c)) { p++; val = (val << 3) | (c & 7); } } sv_cat_unichar(datum, val); } else if(e == 0xfe) { U32 val; c = *++p; if(char_is_hexdigit(c)) { p++; val = hexdigit_value(c); c = *p; if(char_is_hexdigit(c)) { p++; val = (val << 4) | hexdigit_value(c); } } else if(c == '{') { p++; c = *p; if(!char_is_hexdigit(c)) throw_syntax_error(p); val = 0; do { if(val & 0x78000000) throw_constraint_error( "invalid character"); val = (val << 4) | hexdigit_value(c); c = *++p; } while(char_is_hexdigit(c)); if(c != '}') throw_syntax_error(p); p++; } else { throw_syntax_error(p); } sv_cat_unichar(datum, val); } else if(e == 0xfd) { throw_syntax_error(p); } else { p++; sv_catpvn_nomg(datum, (char*)&e, 1); } } *pp = p+1; return datum; } #define parse_sqstring(end, pp) THX_parse_sqstring(aTHX_ end, pp) static SV *THX_parse_sqstring(pTHX_ U8 *end, U8 **pp) { U8 *p = *pp; SV *datum = sv_2mortal(newSVpvs("")); SvUTF8_on(datum); while(1) { U8 c = *p; if(p == end || char_is_control(c)) throw_syntax_error(p); if(c == '\'') break; if(c != '\\') { U8 *q = p; do { U32 val = char_unicode(q); if(unichar_is_control(val)) throw_syntax_error(q); q += UTF8SKIP(q); c = *q; } while(q != end && c != '\'' && c != '\\'); sv_catpvn_nomg(datum, (char*)p, q-p); p = q; } else { c = p[1]; if(c == '\\' || c == '\'') p++; sv_catpvn_nomg(datum, (char*)p, 1); p++; } } *pp = p+1; return datum; } #define array_to_hash(array) THX_array_to_hash(aTHX_ array) static SV *THX_array_to_hash(pTHX_ AV *array) { HV *hash; SV *href; array_ix_t alen, i; alen = av_len(array); if(!(alen & 1)) throw_constraint_error( "odd number of elements in hash constructor"); hash = newHV(); href = sv_2mortal(newRV_noinc((SV*)hash)); for(i = 0; i <= alen; i += 2) { SV **key_ptr = av_fetch(array, i, 0); STRLEN key_len; char *key_str; SV *value; if(!key_ptr || !sv_is_string(*key_ptr)) throw_constraint_error("non-string hash key"); key_str = SvPV(*key_ptr, key_len); value = *av_fetch(array, i+1, 0); if(!hv_store(hash, key_str, -key_len, SvREFCNT_inc(value), 0)) SvREFCNT_dec(value); } return href; } #define parse_datum(end, pp) THX_parse_datum(aTHX_ end, pp) static SV *THX_parse_datum(pTHX_ U8 *end, U8 **pp); static SV *THX_parse_datum(pTHX_ U8 *end, U8 **pp) { U8 *p = *pp; U8 c = *p; SV *datum; if(c == '"') { p++; datum = parse_dqstring(end, &p); } else if(c == '\'') { p++; datum = parse_sqstring(end, &p); } else if(c == '[' || c == '{') { int is_hash = c == '{'; U8 close = is_hash ? '}' : ']'; AV *array = newAV(); sv_2mortal((SV*)array); p++; while(1) { p = parse_opt_wsp(p); if(*p == close) break; av_push(array, SvREFCNT_inc(parse_datum(end, &p))); p = parse_opt_wsp(p); if(*p == close) break; if(*p == ',') { p++; } else if(p[0] == '=' && p[1] == '>') { p += 2; } else { throw_syntax_error(p); } } p++; datum = is_hash ? array_to_hash(array) : sv_2mortal(newRV_inc((SV*)array)); } else if(c & 0x80) { throw_syntax_error(p); } else { U8 attr = asciichar_attr[c]; if(attr & CHARATTR_WORDSTART) { U8 *start = p++; U8 *q; while(char_is_wordcont(*p)) p++; q = parse_opt_wsp(p); if(!(q[0] == '=' && q[1] == '>')) throw_syntax_error(q); datum = sv_2mortal(newSVpvn((char*)start, p-start)); } else if(attr & CHARATTR_DECDIGIT) { U8 *start = p++; if(c == '0') { if(char_is_decdigit(*p)) throw_syntax_error(p); } else { while(char_is_decdigit(*p)) p++; } datum = sv_2mortal(newSVpvn((char*)start, p-start)); } else { throw_syntax_error(p); } } *pp = p; return datum; } /* * Pond writing */ struct writer_options { int indent; int undef_is_empty, unicode; }; static int pvn_is_integer(U8 *p, STRLEN len) { U8 *e = p + len; if(len == 0 || len > 9) return 0; if(*p == '0') return len == 1; for(; p != e; p++) { if(!char_is_decdigit(*p)) return 0; } return 1; } #define ASCIICHAR_QUOTE_LITERAL 0x00 #define ASCIICHAR_QUOTE_HEXPAIR 0x01 static U8 const asciichar_quote[128] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* NUL to BEL */ 0x01, 0x74, 0x6e, 0x01, 0x01, 0x01, 0x01, 0x01, /* BS to SI */ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* DLE to ETB */ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* CAN to US */ 0x00, 0x00, 0x22, 0x00, 0x24, 0x00, 0x00, 0x00, /* SP to ' */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ( to / */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0 to 7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 to ? */ 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* @ to G */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* H to O */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* P to W */ 0x00, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, /* X to _ */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ` to g */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* h to o */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* p to w */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, /* x to DEL */ }; static char const hexdig[16] = "0123456789abcdef"; #define serialise_as_string(wo, out, datum) \ THX_serialise_as_string(aTHX_ wo, out, datum) static void THX_serialise_as_string(pTHX_ struct writer_options *wo, SV *out, SV *datum) { U8 *p; STRLEN len; p = (U8*)SvPV(datum, len); if(pvn_is_integer(p, len)) { sv_catpvn_nomg(out, (char *)p, len); } else { U8 *e = p + len; U8 *lstart = p; sv_catpvs_nomg(out, "\""); while(p != e) { U8 c = *p; if(c & 0x80) { U32 val = char_unicode(p); if(val == 0x80000000) throw_data_error("invalid character"); if(val <= 0xa0 || !wo->unicode) { if(lstart != p) sv_catpvn_nomg(out, (char*)lstart, p-lstart); } p += UTF8SKIP(p); if(val <= 0xa0) { c = val; p--; goto hexpair; } if(!wo->unicode) { char hexbuf[12]; sprintf(hexbuf, "\\x{%02x}", (unsigned)val); sv_catpvn_nomg(out, hexbuf, strlen(hexbuf)); lstart = p; } } else { U8 quote = asciichar_quote[c]; if(quote == ASCIICHAR_QUOTE_LITERAL) { p++; continue; } if(lstart != p) sv_catpvn_nomg(out, (char*)lstart, p-lstart); if(quote == ASCIICHAR_QUOTE_HEXPAIR) { char hexbuf[4]; hexpair: hexbuf[0] = '\\'; hexbuf[1] = 'x'; hexbuf[2] = hexdig[c >> 4]; hexbuf[3] = hexdig[c & 0xf]; sv_catpvn_nomg(out, hexbuf, 4); } else { char bsbuf[2]; bsbuf[0] = '\\'; bsbuf[1] = (char)quote; sv_catpvn_nomg(out, bsbuf, 2); } lstart = ++p; } } if(lstart != p) sv_catpvn_nomg(out, (char*)lstart, p-lstart); sv_catpvs_nomg(out, "\""); } } static int pvn_is_bareword(U8 *p, STRLEN len) { U8 *e = p + len; if(!char_is_wordstart(*p)) return 0; while(++p != e) { if(!char_is_wordcont(*p)) return 0; } return 1; } #define serialise_as_bareword(wo, out, datum) \ THX_serialise_as_bareword(aTHX_ wo, out, datum) static void THX_serialise_as_bareword(pTHX_ struct writer_options *wo, SV *out, SV *datum) { U8 *p; STRLEN len; p = (U8*)SvPV(datum, len); if(pvn_is_bareword(p, len)) { sv_catpvn_nomg(out, (char *)p, len); } else { serialise_as_string(wo, out, datum); } } #define serialise_newline(wo, out) THX_serialise_newline(aTHX_ wo, out) static void THX_serialise_newline(pTHX_ struct writer_options *wo, SV *out) { int indent = wo->indent; if(indent != -1) { STRLEN cur = SvCUR(out); char *p = SvGROW(out, cur+indent+2) + cur; *p++ = '\n'; memset(p, ' ', indent); p[indent] = 0; SvCUR_set(out, cur+1+indent); } } #define serialise_datum(wo, out, datum) \ THX_serialise_datum(aTHX_ wo, out, datum) static void THX_serialise_datum(pTHX_ struct writer_options *wo, SV *out, SV *datum); #define serialise_array(wo, out, adatum) \ THX_serialise_array(aTHX_ wo, out, adatum) static void THX_serialise_array(pTHX_ struct writer_options *wo, SV *out, AV *adatum) { array_ix_t alen = av_len(adatum), pos; if(alen == -1) { sv_catpvs_nomg(out, "[]"); return; } sv_catpvs_nomg(out, "["); if(wo->indent != -1) wo->indent += 4; serialise_newline(wo, out); for(pos = 0; ; pos++) { serialise_datum(wo, out, *av_fetch(adatum, pos, 0)); if(pos == alen && wo->indent == -1) break; sv_catpvs_nomg(out, ","); if(pos == alen) break; serialise_newline(wo, out); } if(wo->indent != -1) wo->indent -= 4; serialise_newline(wo, out); sv_catpvs_nomg(out, "]"); } #define serialise_hash(wo, out, hdatum) \ THX_serialise_hash(aTHX_ wo, out, hdatum) static void THX_serialise_hash(pTHX_ struct writer_options *wo, SV *out, HV *hdatum) { AV *keys; U32 nelem = hv_iterinit(hdatum), pos; if(nelem == 0) { sv_catpvs_nomg(out, "{}"); return; } keys = newAV(); sv_2mortal((SV*)keys); av_extend(keys, nelem-1); for(pos = nelem; pos--; ) { SV *keysv = upgrade_sv( hv_iterkeysv(hv_iternext(hdatum))); av_push(keys, SvREFCNT_inc(keysv)); } sortsv(AvARRAY(keys), nelem, Perl_sv_cmp); sv_catpvs_nomg(out, "{"); if(wo->indent != -1) wo->indent += 4; serialise_newline(wo, out); for(pos = 0; ; pos++) { SV *keysv = *av_fetch(keys, pos, 0); STRLEN klen; char *key; serialise_as_bareword(wo, out, keysv); if(wo->indent == -1) { sv_catpvs_nomg(out, "=>"); } else { sv_catpvs_nomg(out, " => "); } key = SvPV(keysv, klen); serialise_datum(wo, out, *hv_fetch(hdatum, key, -klen, 0)); if(pos == nelem-1 && wo->indent == -1) break; sv_catpvs_nomg(out, ","); if(pos == nelem-1) break; serialise_newline(wo, out); } if(wo->indent != -1) wo->indent -= 4; serialise_newline(wo, out); sv_catpvs_nomg(out, "}"); } static void THX_serialise_datum(pTHX_ struct writer_options *wo, SV *out, SV *datum) { if(sv_is_undef(datum) && wo->undef_is_empty) { sv_catpvs_nomg(out, "\"\""); } else if(sv_is_string(datum)) { datum = upgrade_sv(datum); serialise_as_string(wo, out, datum); } else { if(!SvROK(datum)) throw_data_error("unsupported data type"); datum = SvRV(datum); if(SvOBJECT(datum)) throw_data_error("unsupported data type"); if(SvTYPE(datum) == SVt_PVAV) { serialise_array(wo, out, (AV*)datum); } else if(SvTYPE(datum) == SVt_PVHV) { serialise_hash(wo, out, (HV*)datum); } else { throw_data_error("unsupported data type"); } } } MODULE = Data::Pond PACKAGE = Data::Pond PROTOTYPES: DISABLE SV * pond_read_datum(SV *text_sv) PROTOTYPE: $ PREINIT: STRLEN text_len; U8 *p, *end; CODE: if(!sv_is_string(text_sv)) throw_data_error("text isn't a string"); text_sv = upgrade_sv(text_sv); p = (U8*)SvPV(text_sv, text_len); end = p + text_len; p = parse_opt_wsp(p); RETVAL = parse_datum(end, &p); p = parse_opt_wsp(p); if(p != end) throw_syntax_error(p); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * pond_write_datum(SV *datum, SV *options = 0) PROTOTYPE: $;$ PREINIT: struct writer_options wo = { -1, 0, 0 }; CODE: if(options) { HV *opthash; SV **item_ptr; if(!SvROK(options)) throw_data_error("option hash isn't a hash"); options = SvRV(options); if(SvOBJECT(options) || SvTYPE(options) != SVt_PVHV) throw_data_error("option hash isn't a hash"); opthash = (HV*)options; if((item_ptr = hv_fetchs(opthash, "indent", 0))) { SV *item = *item_ptr; if(!sv_is_undef(item)) { if(!sv_is_string(item)) throw_data_error( "indent option isn't a number"); wo.indent = SvIV(item); if(wo.indent < 0) throw_data_error( "indent option is negative"); } } if((item_ptr = hv_fetchs(opthash, "undef_is_empty", 0))) { SV *item = *item_ptr; wo.undef_is_empty = cBOOL(SvTRUE(item)); } if((item_ptr = hv_fetchs(opthash, "unicode", 0))) { SV *item = *item_ptr; wo.unicode = cBOOL(SvTRUE(item)); } } RETVAL = sv_2mortal(newSVpvs("")); SvUTF8_on(RETVAL); serialise_datum(&wo, RETVAL, datum); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL Data-Pond-0.005/t000755001750001750 013133614716 13470 5ustar00zeframzefram000000000000Data-Pond-0.005/t/error.t000444001750001750 146213133614716 15146 0ustar00zeframzefram000000000000use warnings; use strict; use Test::More tests => 43; BEGIN { use_ok "Data::Pond", qw(pond_read_datum pond_write_datum); } foreach( undef, [], {}, ) { eval { pond_read_datum(undef); }; like $@, qr/\APond data error: /; } foreach( *STDOUT, \"", sub{}, bless({},"main"), bless({},"ARRAY"), bless([],"main"), bless([],"HASH"), [ sub{} ], ) { eval { pond_write_datum($_, {}); }; like $@, qr/\APond data error: /; eval { pond_read_datum($_); }; like $@, qr/\APond data error: /; } foreach( "", " ", "foo", "undef", "foo=>", "1,", "[,]", "[,1]", "[1,,]", "[1,,2]", "'\x00'", "\"\x00\"", "'\t'", "\"\t\"", "'\n'", "\"\n\"", "'\x7f'", "\"\x7f\"", "'\x80'", "\"\x80\"", "'\xa0'", "\"\xa0\"", "\"\\c\"", ) { eval { pond_read_datum($_); }; like $@, qr/\APond syntax error\b/; } 1; Data-Pond-0.005/t/error_pp.t000444001750001750 14713133614716 15624 0ustar00zeframzefram000000000000use warnings; use strict; do "./t/setup_pp.pl" or die $@ || $!; do "./t/error.t" or die $@ || $!; 1; Data-Pond-0.005/t/expr.t000444001750001750 661113133614716 14774 0ustar00zeframzefram000000000000use warnings; use strict; use Test::More tests => 196; BEGIN { use_ok "Data::Pond", qw(pond_read_datum pond_write_datum); } is_deeply pond_read_datum($_), eval($_) foreach '""', '"abc"', '"a b"', "\"a'b\"", '"a\tb"', '"a\nb"', "\"a\\\\b\"", "\"a\\\"b\"", "\"a\\\$b\"", "\"a\\\*b\"", '"a\xe3b"', '"a\x00b"', '"a\x7fb"', '"a\x80b"', '"a\xa0b"', '"a\x{123}b"', "\"a\x{123}b\"", '"a\123b"', '"a\12b"', '"a\1b"', ' "abc" ', "''", "'abc'", "'a\"bc'", "'a\\bc'", "'a\\'bc'", "'a\\\\bc'", "'a\\\"bc'", "'a\x{123}b'", " 'abc' ", '123', '7', '0', '"0123"', '"00"', '"1234567890"', ' 123 ', '[]', '[1]', '[ 1 ]', '[1,]', '[ 1 , ]', '["a",2]', '["a",2,]', '["a",[2],]', ' [ "a" , [ 2 ] , ] ', '{}', '{1,2}', '{1,2,}', ' { 1 , 2 , } ', '{a=>b=>}', ' { a => b => } ', '{a=>[],b=>123}', '{a=>[],b=>123,}', '{" foo",123}'; is pond_write_datum(pond_read_datum($_), {unicode=>1}), $_ foreach '""', '"abc"', '"a b"', "\"a'b\"", '"a\tb"', '"a\nb"', "\"a\\\\b\"", "\"a\\\"b\"", "\"a\\\$b\"", "\"a*b\"", "\"a\xe3b\"", '"a\x00b"', '"a\x7fb"', '"a\x80b"', '"a\xa0b"', "\"a\x{123}b\"", '123', '7', '0', '"0123"', '"00"', '"1234567890"', '[]', '[1]', '["a",2]', '["a",[2]]', '{}', '{1=>2}', '{a=>"b"}', '{a=>[],b=>123}', '{a=>[],b=>"0123"}', '{a=>[],b=>"00"}', '{a=>[],b=>"1234567890"}', '{" foo"=>123}', "{\"z\x{123}Z\"=>[\"a\x{123}A\"]}"; is pond_write_datum(pond_read_datum($_), {indent=>0, unicode=>1}), $_ foreach '""', '"abc"', '"a b"', "\"a'b\"", '"a\tb"', '"a\nb"', "\"a\\\\b\"", "\"a\\\"b\"", "\"a\\\$b\"", "\"a*b\"", "\"a\xe3b\"", '"a\x00b"', '"a\x7fb"', '"a\x80b"', '"a\xa0b"', "\"a\x{123}b\"", '123', '7', '0', '"0123"', '"00"', '"1234567890"', '[]', "[\n 1,\n]", "[\n \"a\",\n 2,\n]", "[\n \"a\",\n [\n 2,\n ],\n]", "{}", "{\n 1 => 2,\n}", "{\n a => \"b\",\n}", "{\n a => [],\n b => 123,\n}", "{\n a => [],\n b => \"0123\",\n}", "{\n a => [],\n b => \"00\",\n}", "{\n a => [],\n b => \"1234567890\",\n}", "{\n \" foo\" => 123,\n}", "{\n \"z\x{123}Z\" => [\n \"a\x{123}A\",\n ],\n}"; is pond_write_datum(pond_read_datum($_), {}), $_ foreach '""', '"abc"', '"a b"', "\"a'b\"", '"a\tb"', '"a\nb"', "\"a\\\\b\"", "\"a\\\"b\"", "\"a\\\$b\"", "\"a*b\"", '"a\x{e3}b"', '"a\x00b"', '"a\x7fb"', '"a\x80b"', '"a\xa0b"', '"a\x{123}b"', '123', '7', '0', '"0123"', '"00"', '"1234567890"', '[]', '[1]', '["a",2]', '["a",[2]]', '{}', '{1=>2}', '{a=>"b"}', '{a=>[],b=>123}', '{a=>[],b=>"0123"}', '{a=>[],b=>"00"}', '{a=>[],b=>"1234567890"}', '{" foo"=>123}', "{\"z\\x{123}Z\"=>[\"a\\x{123}A\"]}"; is pond_write_datum(pond_read_datum($_), {indent=>0}), $_ foreach '""', '"abc"', '"a b"', "\"a'b\"", '"a\tb"', '"a\nb"', "\"a\\\\b\"", "\"a\\\"b\"", "\"a\\\$b\"", "\"a*b\"", "\"a\\x{e3}b\"", '"a\x00b"', '"a\x7fb"', '"a\x80b"', '"a\xa0b"', "\"a\\x{123}b\"", '123', '7', '0', '"0123"', '"00"', '"1234567890"', '[]', "[\n 1,\n]", "[\n \"a\",\n 2,\n]", "[\n \"a\",\n [\n 2,\n ],\n]", "{}", "{\n 1 => 2,\n}", "{\n a => \"b\",\n}", "{\n a => [],\n b => 123,\n}", "{\n a => [],\n b => \"0123\",\n}", "{\n a => [],\n b => \"00\",\n}", "{\n a => [],\n b => \"1234567890\",\n}", "{\n \" foo\" => 123,\n}", "{\n \"z\\x{123}Z\" => [\n \"a\\x{123}A\",\n ],\n}"; 1; Data-Pond-0.005/t/expr_pp.t000444001750001750 14613133614716 15450 0ustar00zeframzefram000000000000use warnings; use strict; do "./t/setup_pp.pl" or die $@ || $!; do "./t/expr.t" or die $@ || $!; 1; Data-Pond-0.005/t/pod_cvg.t000444001750001750 27313133614716 15415 0ustar00zeframzefram000000000000use warnings; use strict; use Test::More; plan skip_all => "Test::Pod::Coverage not available" unless eval "use Test::Pod::Coverage; 1"; Test::Pod::Coverage::all_pod_coverage_ok(); 1; Data-Pond-0.005/t/pod_cvg_pp.t000444001750001750 15113133614716 16107 0ustar00zeframzefram000000000000use warnings; use strict; do "./t/setup_pp.pl" or die $@ || $!; do "./t/pod_cvg.t" or die $@ || $!; 1; Data-Pond-0.005/t/pod_syn.t000444001750001750 23613133614716 15446 0ustar00zeframzefram000000000000use warnings; use strict; use Test::More; plan skip_all => "Test::Pod not available" unless eval "use Test::Pod 1.00; 1"; Test::Pod::all_pod_files_ok(); 1; Data-Pond-0.005/t/setup_pp.pl000444001750001750 31513133614716 16000 0ustar00zeframzefram000000000000require XSLoader; my $orig_load = \&XSLoader::load; no warnings "redefine"; *XSLoader::load = sub { die "XS loading disabled for Data::Pond" if ($_[0] || "") eq "Data::Pond"; goto &$orig_load; }; 1; Data-Pond-0.005/t/undef.t000444001750001750 64713133614716 15102 0ustar00zeframzefram000000000000use warnings; use strict; use Test::More tests => 7; BEGIN { use_ok "Data::Pond", qw(pond_write_datum); } foreach( undef, [ undef ], { a => undef }, ) { eval { pond_write_datum($_, {}); }; like $@, qr/\APond data error: /; } is pond_write_datum(undef, {undef_is_empty=>1}), '""'; is pond_write_datum([ undef ], {undef_is_empty=>1}), '[""]'; is pond_write_datum({ a => undef }, {undef_is_empty=>1}), '{a=>""}'; 1; Data-Pond-0.005/t/undef_pp.t000444001750001750 14713133614716 15574 0ustar00zeframzefram000000000000use warnings; use strict; do "./t/setup_pp.pl" or die $@ || $!; do "./t/undef.t" or die $@ || $!; 1;