debian/0000755000000000000000000000000012262427550007173 5ustar debian/ruby-xmlparser.docs0000644000000000000000000000002112262421624013026 0ustar README README.ja debian/copyright0000644000000000000000000000631212262421624011124 0ustar This package was debianized by akira yamada akira@debian.org on Sat, 22 Aug 1998 13:43:46 +0900. It was downloaded from Upstream Author: Yoshida Masato XPointer support is contributed by Masaki Fukushima encoding.h and the functions of encoding map are part of XML::Parser for Perl. Copyright (c) 1998 Larry Wall and Clark Cooper. All rights reserved. His program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Copyright: Ruby's Ruby's License: Ruby is copyrighted free software by Yukihiro Matsumoto . You can redistribute it and/or modify it under either the terms of the GPL (see the file GPL), or the conditions below: 1. You may make and give away verbatim copies of the source form of the software without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may modify your copy of the software in any way, provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or by allowing the author to include your modifications in the software. b) use the modified software only within your corporation or organization. c) give non-standard binaries non-standard names, with instructions on where to get the original software distribution. d) make other distribution arrangements with the author. 3. You may distribute the software in object code or binary form, provided that you do at least ONE of the following: a) distribute the binaries and library files of the software, together with instructions (in the manual page or equivalent) on where to get the original distribution. b) accompany the distribution with the machine-readable source of the software. c) give non-standard binaries non-standard names, with instructions on where to get the original software distribution. d) make other distribution arrangements with the author. 4. You may modify and include the part of the software into any other software (possibly commercial). But some files in the distribution are not written by the author, so that they are not under these terms. For the list of those files and their copying conditions, see the file LEGAL. 5. The scripts and library files supplied as input to or produced as output from the software do not automatically fall under the copyright of the software, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this software. 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. GNU General Public License: On Debian systems, the complete text of the GNU General Public License can be found in `/usr/share/common-licenses/GPL'. debian/rules0000755000000000000000000000104312262421624010245 0ustar #!/usr/bin/make -f #export DH_VERBOSE=1 # # Uncomment to ignore all test failures (but the tests will run anyway) #export DH_RUBY_IGNORE_TESTS=all # # Uncomment to ignore some test failures (but the tests will run anyway). # Valid values: #export DH_RUBY_IGNORE_TESTS=ruby1.8 ruby1.9.1 require-rubygems # # If you need to specify the .gemspec (eg there is more than one) #export DH_RUBY_GEMSPEC=gem.gemspec %: dh $@ --buildsystem=ruby --with ruby override_dh_auto_clean: dh_auto_clean # moved to ext/ rm -f encoding.h extconf.rb xmlparser.c debian/source/0000755000000000000000000000000012262421624010467 5ustar debian/source/format0000644000000000000000000000001412262421624011675 0ustar 3.0 (quilt) debian/control0000644000000000000000000000155312262427434010603 0ustar Source: ruby-xmlparser Section: ruby Priority: optional Maintainer: Debian Ruby Extras Maintainers Uploaders: Lucas Nussbaum Build-Depends: debhelper (>= 7.0.50~), gem2deb (>= 0.6.0~), libexpat1-dev (>= 1.95.1) Standards-Version: 3.9.5 Vcs-Git: git://anonscm.debian.org/pkg-ruby-extras/ruby-xmlparser.git Vcs-Browser: http://anonscm.debian.org/gitweb?p=pkg-ruby-extras/ruby-xmlparser.git;a=summary Homepage: http://www.yoshidam.net/Ruby.html XS-Ruby-Versions: all Package: ruby-xmlparser Architecture: any XB-Ruby-Versions: ${ruby:Versions} Depends: ${shlibs:Depends}, ${misc:Depends}, ruby | ruby-interpreter Description: Ruby interface for the expat XML parser toolkit This is the module to access to James Clark's XML Parser Toolkit "expat" from Ruby. debian/watch0000644000000000000000000000011112262421624010211 0ustar version=3 http://www.yoshidam.net/Ruby.html xmlparser-([\d.]+)\.tar\.gz debian/changelog0000644000000000000000000001500312262427471011046 0ustar ruby-xmlparser (0.7.2-3) unstable; urgency=medium * Team upload. [ Cédric Boutillier ] * debian/control: remove obsolete DM-Upload-Allowed flag * use canonical URI in Vcs-* fields [ Christian Hofstaedtler ] * Bump Standards-Version to 3.9.5 (no changes) * Remove transitional packages * Update Build-Depends for ruby2.0, drop ruby1.8 -- Christian Hofstaedtler Mon, 06 Jan 2014 05:20:55 +0100 ruby-xmlparser (0.7.2-2) unstable; urgency=low * Team upload. * Rebuild with newer gem2deb * Seems to conform to standards 3.9.3 * Move transitional packages to Priority: extra -- Vincent Fourmond Sun, 24 Jun 2012 01:03:06 +0200 ruby-xmlparser (0.7.2-1.1) unstable; urgency=low * Non-maintainer upload. * Fix "FTBFS: xmlparser.c:1783:2: error: format not a string literal and no format arguments [-Werror=format-security]": add patch xmlparser-ftbfs-fix.patch from Andreas Stührk (adds format strings). (Closes: #676194) -- gregor herrmann Fri, 22 Jun 2012 16:52:46 +0200 ruby-xmlparser (0.7.2-1) unstable; urgency=low * Switch to gem2deb-based packaging. -- Lucas Nussbaum Thu, 16 Jun 2011 21:57:51 +0200 libxml-parser-ruby (0.7.1-1.1) unstable; urgency=high * Non-maintainer upload. * debian/patches - add "add-digest.rb" to work it with ruby1.9.1 (Closes: #593082) -- Hideki Yamane Fri, 24 Dec 2010 22:59:07 +0900 libxml-parser-ruby (0.7.1-1) unstable; urgency=low * New upstream release * Bumped Standard version to 3.8.4 * debian/control + added myself to uploaders + ported to ruby1.9.1 + debhelper to >=7 + added dummy package + added ${misc:Depends} * debian/copyright + added author(s) information * Switch to dpkg-source 3.0 (quilt) format + added debian/source [ Gunnar Wolf ] * Changed section to Ruby as per ftp-masters' request -- Deepak Tripathi Sat, 12 Jun 2010 02:17:59 +0530 libxml-parser-ruby (0.6.8-4) unstable; urgency=low [ Vincent Fourmond ] * Added watchfile [ Filipe Lautert ] * Applied Barry deFreese patch to allow package to stick to pkg- ruby-extra standards and moving libuconv-ruby1.8 from suggests to depends field (closes: #454953, #396019). * Moved Debian Ruby Extras Maintainers to uploaders field and set myself as maintainer (closes: #472433). * Created patch to add return missing return statement in file lib/xml/saxdriver.rb:171 (closes: #421250). * Added diff to escape all > symbol in command strings to allow compatibility with broken XML parsers (closes: #403164). -- Filipe Lautert Fri, 27 Jun 2008 21:13:54 -0300 libxml-parser-ruby (0.6.8-3) unstable; urgency=low * New maintainer. * Added Vcs-*. -- Lucas Nussbaum Sat, 08 Dec 2007 13:50:06 +0100 libxml-parser-ruby (0.6.8-2) unstable; urgency=low * dropped ruby1.6 support. (closes: #367909, #368512) -- akira yamada Tue, 23 May 2006 14:31:16 +0900 libxml-parser-ruby (0.6.8-1) unstable; urgency=low * new upstream version. -- akira yamada Sat, 14 Aug 2004 19:31:23 +0900 libxml-parser-ruby (0.6.1-4) unstable; urgency=low * new sub-package libxml-parser-ruby1.6. - renamed from libxml-parser-ruby. * new sub-package libxml-parser-ruby1.8. -- akira yamada Wed, 10 Sep 2003 15:47:14 +0900 libxml-parser-ruby (0.6.1-3) unstable; urgency=low * rebuild with libexpat1 1.95.2-2.1. because version numbering of libexpat.so was changed. closes: #117470 -- akira yamada Mon, 29 Oct 2001 10:53:52 +0900 libxml-parser-ruby (0.6.1-2) unstable; urgency=low * --with-perl-enc-map=/usr/lib/perl5/XML/Parser/Encodings -- akira yamada Sat, 23 Jun 2001 19:13:59 +0900 libxml-parser-ruby (0.6.1-1) unstable; urgency=low * Upgraded to new upstream version. * Build with expat 1.95. * Build depends: libexpat1-dev. -- akira yamada Fri, 02 Feb 2001 23:36:50 +0900 libxml-parser-ruby (0.5.19-2) unstable; urgency=low * rebuild with ruby_1.6.2-5. -- akira yamada Thu, 25 Jan 2001 23:28:12 +0900 libxml-parser-ruby (0.5.19-1) unstable; urgency=low * Upgraded to new upstream version. * Build with ruby_1.6.0. * Updated Standards-Version to 3.2.1. * Added Build-Depends field into control file. -- akira yamada Tue, 5 Sep 2000 01:30:33 +0900 libxml-parser-ruby (0.5.16-2) unstable; urgency=low * Rebuild with ruby_1.4.4 -- akira yamada Tue, 4 Apr 2000 21:20:17 +0900 libxml-parser-ruby (0.5.16-1) unstable; urgency=low * Upgraded to new upstream version. * FHS complience. -- akira yamada Fri, 29 Oct 1999 15:01:48 +0900 libxml-parser-ruby (0.5.14-2) unstable; urgency=low * Build with ruby 1.4. -- akira yamada Wed, 18 Aug 1999 10:25:54 +0900 libxml-parser-ruby (0.5.14-1) unstable; urgency=low * Upgraded to new upstream version. -- akira yamada Tue, 6 Jul 1999 22:16:37 +0900 ruby-xmlparser-module (0.5.6-1) unstable; urgency=low * Upgraded to new upstream version. - class name aliasses are defined in C module. -- akira yamada Thu, 25 Feb 1999 17:02:58 +0900 ruby-xmlparser-module (0.5.4-2) unstable-jp; urgency=low * Build with ruby1.2_1.2.2. -- akira yamada Mon, 1 Feb 1999 14:58:21 +0900 ruby-xmlparser-module (0.5.4-1) unstable-jp; urgency=low * Upgraded to new upstream version. * Build with ruby_1.2.2. -- akira yamada Fri, 22 Jan 1999 18:19:15 +0900 ruby-xmlparser-module (0.4.17-1) unstable-jp; urgency=low * Upgraded to new upstream version. - mIDs are stored into static vars. - change ID attribute support of XPointer. - Node#trim is now xml:space-aware. - fix some bugs, add class name alias. -- akira yamada Sat, 31 Oct 1998 09:23:51 +0900 ruby-xmlparser-module (0.4.14-1) unstable-jp; urgency=low * Upgraded to new upstream version. -- akira yamada Tue, 27 Oct 1998 12:22:33 +0900 ruby-xmlparser-module (0.4.7-1) unstable-jp; urgency=low * Upgraded to new upstream version. -- akira yamada Sat, 3 Oct 1998 00:09:44 +0900 ruby-xmlparser-module (0.3.3-1) unstable-jp; urgency=low * Initial Release. -- akira yamada Sat, 22 Aug 1998 13:43:46 +0900 debian/compat0000644000000000000000000000000212262421624010365 0ustar 7 debian/patches/0000755000000000000000000000000012262421624010616 5ustar debian/patches/move-files-to-ext.patch0000644000000000000000000021260312262421624015127 0ustar Description: Move C files to ext/ This is required to avoid the double-installation of files in lib/ by extconf.rb (see #630641) Origin: vendor --- /dev/null +++ ruby-xmlparser-0.7.2/ext/encoding.h @@ -0,0 +1,91 @@ +/***************************************************************** +** encoding.h +** +** Copyright 1998 Clark Cooper +** All rights reserved. +** +** This program is free software; you can redistribute it and/or +** modify it under the same terms as Perl itself. +*/ + +#ifndef ENCODING_H +#define ENCODING_H 1 + +#define ENCMAP_MAGIC 0xfeebface + +typedef struct prefixmap { + unsigned char min; + unsigned char len; /* 0 => 256 */ + unsigned short bmap_start; + unsigned char ispfx[32]; + unsigned char ischar[32]; +} PrefixMap; + +typedef struct encinf +{ + unsigned short prefixes_size; + unsigned short bytemap_size; + int firstmap[256]; + PrefixMap *prefixes; + unsigned short *bytemap; +} Encinfo; + +typedef struct encmaphdr +{ + unsigned int magic; + char name[40]; + unsigned short pfsize; + unsigned short bmsize; + int map[256]; +} Encmap_Header; + +/*================================================================ +** Structure of Encoding map binary encoding +** +** Note that all shorts and ints are in network order, +** so when packing or unpacking with perl, use 'n' and 'N' respectively. +** In C, use the htonl family of functions. +** +** The basic structure is: +** +** _______________________ +** |Header (including map expat needs for 1st byte) +** |PrefixMap * pfsize +** | This section isn't included for single-byte encodings. +** | For multiple byte encodings, when a byte represents a prefix +** | then it indexes into this vector instead of mapping to a +** | Unicode character. The PrefixMap type is declared above. The +** | ispfx and ischar fields are bitvectors indicating whether +** | the byte being mapped is a prefix or character respectively. +** | If neither is set, then the character is not mapped to Unicode. +** | +** | The min field is the 1st byte mapped for this prefix; the +** | len field is the number of bytes mapped; and bmap_start is +** | the starting index of the map for this prefix in the overall +** | map (next section). +** |unsigned short * bmsize +** | This section also is omitted for single-byte encodings. +** | Each short is either a Unicode scalar or an index into the +** | PrefixMap vector. +** +** The header for these files is declared above as the Encmap_Header type. +** The magic field is a magic number which should match the ENCMAP_MAGIC +** macro above. The next 40 bytes stores IANA registered name for the +** encoding. The pfsize field holds the number of PrefixMaps, which should +** be zero for single byte encodings. The bmsize field holds the number of +** shorts used for the overall map. +** +** The map field contains either the Unicode scalar encoded by the 1st byte +** or -n where n is the number of bytes that such a 1st byte implies (Expat +** requires that the number of bytes to encode a character is indicated by +** the 1st byte) or -1 if the byte doesn't map to any Unicode character. +** +** If the encoding is a multiple byte encoding, then there will be PrefixMap +** and character map sections. The 1st PrefixMap (index 0), covers a range +** of bytes that includes all 1st byte prefixes. +** +** Look at convert_to_unicode in Expat.xs to see how this data structure +** is used. +*/ + +#endif /* ndef ENCODING_H */ --- /dev/null +++ ruby-xmlparser-0.7.2/ext/extconf.rb @@ -0,0 +1,58 @@ +# +# ruby extconf.rb +# --with-perl-enc-map[=/path/to/enc-map] +# --with-expat-dir=/path/to/expat +# --with-expat-lib=/path/to/expat/lib +# --with-expat-include=/path/to/expat/include +# +require 'mkmf' + +cwd=`pwd`.chomp! +perl= ENV['PERL'] || 'perl' + +## Encoding maps may be stored in $perl_archlib/XML/Parser/Encodins/ +#perl_archlib = '/usr/lib/perl5/site_perl/5.005/i586-linux' +#perl_archlib = '/usr/local/lib' +perl_archlib = `#{perl} -e 'use Config; print $Config{"archlib"}'` +xml_enc_path = with_config("perl-enc-map") +if xml_enc_path == true + xml_enc_path = perl_archlib + "/XML/Parser/Encodings" +end + +##$CFLAGS="-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok" + +## ' -DXML_ENC_PATH=getenv\(\"XML_ENC_PATH\"\)' + +## " -DNEW_EXPAT" +#$CFLAGS = "-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok" +#$LDFLAGS = "-L#{cwd}/expat/xmlparse -Wl,-rpath,/usr/local/lib" +#$LDFLAGS = "-L#{cwd}/expat/xmlparse" +dir_config("expat") +#dir_config("xmltok") +#dir_config("xmlparse") +if xml_enc_path + $CFLAGS += " -DXML_ENC_PATH=\\\"#{xml_enc_path}\\\"" +end + +#if have_header("xmlparse.h") || have_header("expat.h") +if have_header("expat.h") || have_header("xmlparse.h") + if have_library("expat", "XML_ParserCreate") || + have_library("xmltok", "XML_ParserCreate") + if have_func("XML_SetNotStandaloneHandler") + $CFLAGS += " -DNEW_EXPAT" + end + if have_func("XML_SetParamEntityParsing") + $CFLAGS += " -DXML_DTD" + end +# if have_func("XML_SetExternalParsedEntityDeclHandler") +# $CFLAGS += " -DEXPAT_1_2" +# end + have_func("XML_SetDoctypeDeclHandler") + have_func("XML_ParserReset") + have_func("XML_SetSkippedEntityHandler") + have_func("XML_GetFeatureList") + have_func("XML_UseForeignDTD") + have_func("XML_GetIdAttributeIndex") + have_library("socket", "ntohl") + have_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/ + create_makefile("xmlparser") + end +end --- /dev/null +++ ruby-xmlparser-0.7.2/ext/xmlparser.c @@ -0,0 +1,2292 @@ +/* + * Expat (XML Parser Toolkit) wrapper for Ruby + * Dec 15, 2009 yoshidam version 0.7.0 support Ruby 1.9.1 + * Feb 16, 2004 yoshidam version 0.6.8 taint output string + * Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow + * Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler + * Sep 20, 2002 yoshidam version 0.6.5 fix reset method + * Apr 4, 2002 yoshidam version 0.6.3 change event code values + * Oct 10, 2000 yoshidam version 0.6.1 support expat-1.2 + * Oct 6, 2000 yoshidam version 0.6.0 support expat-1.95.0 + * Jun 28, 1999 yoshidam version 0.5.18 define initialize for Ruby 1.5 + * Jun 28, 1999 yoshidam version 0.5.15 support start/endDoctypeDecl + * Jun 28, 1999 yoshidam version 0.5.14 support setParamEntityParsing + * Apr 28, 1999 yoshidam version 0.5.11 support notStandalone + * Mar 29, 1998 yoshidam version 0.5.9 optimize for Ruby 1.3 + * Mar 8, 1998 yoshidam version 0.5.7 support start/endNamespaceDecl + * Jan 14, 1998 yoshidam version 0.5.4 support start/endCdataSection + * Jan 10, 1998 yoshidam version 0.5.3 support encoding map + * Nov 24, 1998 yoshidam version 0.5.0 support TEST version of expat + * Nov 5, 1998 yoshidam version 0.4.18 mIDs are initialized in Init_xmlparser + * Oct 28, 1998 yoshidam version 0.4.17 mIDs are stored into static vars + * Oct 13, 1998 yoshidam version 0.4.12 debug and speed up myEncodingConv + * Oct 7, 1998 yoshidam version 0.4.11 hold internal object into ivar + * Sep 18, 1998 yoshidam version 0.4.6 + * Sep 8, 1998 yoshidam version 0.4.4 + * Sep 3, 1998 yoshidam version 0.4.3 + * Sep 1, 1998 yoshidam version 0.4.2 + * Aug 28, 1998 yoshidam version 0.4.1 + * Aug 22, 1998 yoshidam version 0.4.0 + * Jul 6, 1998 yoshidam version 0.2 + * Jun 30, 1998 yoshidam version 0.1 + * + * XML_ENC_PATH: path of encoding map for Perl + * HAVE_XML_USEFOREIGNDTD: expat 1.95.5 + * HAVE_XML_GETFEATURELIST: expat 1.95.5 + * HAVE_XML_SETSKIPPEDENTITYHANDLER: expat 1.95.4 + * HAVE_XML_PARSERRESET: expat 1.95.3 + * HAVE_EXPAT_H: expat 1.95.0 + * HAVE_XML_SETDOCTYPEDECLHANDLER: expat 19990728 + * XML_DTD: expat 19990626 + * NEW_EXPAT: expat 1.1 + */ + +#include "ruby.h" +#ifdef HAVE_RUBY_IO_H +# include "ruby/io.h" +#else +# include "rubyio.h" +#endif +#include +#include +#ifdef HAVE_EXPAT_H +# include "expat.h" +#else +# include "xmlparse.h" +#endif +#ifdef XML_ENC_PATH +# include +# include +# include "encoding.h" +# ifndef PATH_MAX +# define PATH_MAX 256 +# endif +#endif + +#ifndef RSTRING_PTR +# define RSTRING_PTR(s) (RSTRING(s)->ptr) +# define RSTRING_LEN(s) (RSTRING(s)->len) +#endif + +#ifdef HAVE_RUBY_ENCODING_H +static rb_encoding* enc_xml; +#endif + +static VALUE eXMLParserError; +static VALUE cXMLParser; +static VALUE cXMLEncoding; +static ID id_map; +static ID id_startElementHandler; +static ID id_endElementHandler; +static ID id_characterDataHandler; +static ID id_processingInstructionHandler; +static ID id_defaultHandler; +static ID id_defaultExpandHandler; +static ID id_unparsedEntityDeclHandler; +static ID id_notationDeclHandler; +static ID id_externalEntityRefHandler; +static ID id_unknownEncoding; +static ID id_convert; +#ifdef NEW_EXPAT +static ID id_commentHandler; +static ID id_startCdataSectionHandler; +static ID id_endCdataSectionHandler; +static ID id_startNamespaceDeclHandler; +static ID id_endNamespaceDeclHandler; +static ID id_notStandaloneHandler; +#endif +#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER +static ID id_startDoctypeDeclHandler; +static ID id_endDoctypeDeclHandler; +#endif +#ifdef HAVE_EXPAT_H +static ID id_elementDeclHandler; +static ID id_attlistDeclHandler; +static ID id_xmlDeclHandler; +static ID id_entityDeclHandler; +#endif +#if 0 +static ID id_externalParsedEntityDeclHandler; +static ID id_internalParsedEntityDeclHandler; +#endif +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER +static ID id_skippedEntityHandler; +#endif + +#define GET_PARSER(obj, parser) \ + Data_Get_Struct(obj, XMLParser, parser) + +typedef struct _XMLParser { + XML_Parser parser; + int iterator; + int defaultCurrent; +#ifdef NEW_EXPAT + const XML_Char** lastAttrs; +#endif + int tainted; + VALUE parent; + char* context; + const XML_Char *detectedEncoding; +} XMLParser; + +static VALUE symDEFAULT; +static VALUE symSTART_ELEM; +static VALUE symEND_ELEM; +static VALUE symCDATA; +static VALUE symPI; +static VALUE symUNPARSED_ENTITY_DECL; +static VALUE symNOTATION_DECL; +static VALUE symEXTERNAL_ENTITY_REF; +#ifdef NEW_EXPAT +static VALUE symCOMMENT; +static VALUE symSTART_CDATA; +static VALUE symEND_CDATA; +static VALUE symSTART_NAMESPACE_DECL; +static VALUE symEND_NAMESPACE_DECL; +#endif +#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER +static VALUE symSTART_DOCTYPE_DECL; +static VALUE symEND_DOCTYPE_DECL; +#endif +#ifdef HAVE_EXPAT_H +static VALUE symELEMENT_DECL; +static VALUE symATTLIST_DECL; +static VALUE symXML_DECL; +static VALUE symENTITY_DECL; +#endif +#if 0 +static VALUE symEXTERNAL_PARSED_ENTITY_DECL; +static VALUE symINTERNAL_PARSED_ENTITY_DECL; +#endif +#if 0 +static VALUE symUNKNOWN_ENCODING; +#endif +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER +static VALUE symSKIPPED_ENTITY; +#endif + +/* destructor */ +static void +XMLParser_free(XMLParser* parser) +{ + /* fprintf(stderr, "Delete XMLParser: %p->%p\n", parser, parser->parser);*/ + if (parser->parser) { + XML_ParserFree(parser->parser); + parser->parser = NULL; + } + free(parser); +} + +static void +XMLParser_mark(XMLParser* parser) +{ + /* fprintf(stderr, "Mark XMLParser: %p->%p\n", parser, parser->parser);*/ + if (!NIL_P(parser->parent)) { + XMLParser* parent; + GET_PARSER(parser->parent, parent); + rb_gc_mark(parser->parent); + } +} + +static void +taintParser(XMLParser* parser) { + parser->tainted |= 1; + if (!NIL_P(parser->parent) && !parser->context) { + XMLParser* parent; + GET_PARSER(parser->parent, parent); + taintParser(parent); + } +} + +inline static VALUE +taintObject(XMLParser* parser, VALUE obj) { + if (parser->tainted) + OBJ_TAINT(obj); + return obj; +} +#define TO_(o) (taintObject(parser, o)) + +inline static VALUE +freezeObject(VALUE obj) { + OBJ_FREEZE(obj); + return obj; +} +#define FO_(o) (freezeObject(o)) + +#ifdef HAVE_RUBY_ENCODING_H +# define ENC_(o) (rb_enc_associate(o, enc_xml)) +#else +# define ENC_(o) (o) +#endif + + +/* Event handlers for iterator */ +static void +iterStartElementHandler(void *recv, + const XML_Char *name, const XML_Char **atts) +{ + XMLParser* parser; + VALUE attrhash; + + GET_PARSER(recv, parser); +#ifdef NEW_EXPAT + parser->lastAttrs = atts; +#endif + attrhash = rb_hash_new(); + while (*atts) { + const char* key = *atts++; + const char* val = *atts++; + rb_hash_aset(attrhash, + FO_(TO_(ENC_(rb_str_new2((char*)key)))), + TO_(ENC_(rb_str_new2((char*)val)))); + } + + rb_yield(rb_ary_new3(4, symSTART_ELEM, + TO_(ENC_(rb_str_new2((char*)name))), attrhash, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterEndElementHandler(void *recv, + const XML_Char *name) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symEND_ELEM, + TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterCharacterDataHandler(void *recv, + const XML_Char *s, + int len) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symCDATA, + Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterProcessingInstructionHandler(void *recv, + const XML_Char *target, + const XML_Char *data) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symPI, + TO_(ENC_(rb_str_new2((char*)target))), + TO_(ENC_(rb_str_new2((char*)data))), recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterDefaultHandler(void *recv, + const XML_Char *s, + int len) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symDEFAULT, + Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + /* XML_DefaultCurrent shoould not call in defaultHandler */ + /* XML_DefaultCurrent(parser->parser); */ + } +} + +void +iterUnparsedEntityDeclHandler(void *recv, + const XML_Char *entityName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + XMLParser* parser; + VALUE valary; + + GET_PARSER(recv, parser); + valary = rb_ary_new3(4, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + TO_(ENC_(rb_str_new2((char*)systemId))), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil), + TO_(ENC_(rb_str_new2((char*)notationName)))); + rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL, + TO_(ENC_(rb_str_new2((char*)entityName))), + valary, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +void +iterNotationDeclHandler(void *recv, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + XMLParser* parser; + VALUE valary; + + GET_PARSER(recv, parser); + valary = rb_ary_new3(3, + (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil)); + rb_yield(rb_ary_new3(4, symNOTATION_DECL, + TO_(ENC_(rb_str_new2((char*)notationName))), + valary, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +int +iterExternalEntityRefHandler(XML_Parser xmlparser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + XMLParser* parser; + VALUE recv; + VALUE valary; + VALUE ret; + + recv = (VALUE)XML_GetUserData(xmlparser); + GET_PARSER(recv, parser); + valary = rb_ary_new3(3, + (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil)); + ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF, + (context ? TO_(ENC_(rb_str_new2((char*)context))) : Qnil), + valary, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } + /* The error status in this iterator block should be returned + by the exception. */ + return 1; +} + +#ifdef NEW_EXPAT +static void +iterCommentHandler(void *recv, + const XML_Char *s) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symCOMMENT, + Qnil, TO_(ENC_(rb_str_new2((char*)s))), recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterStartCdataSectionHandler(void *recv) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symSTART_CDATA, Qnil, Qnil, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterEndCdataSectionHandler(void *recv) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symEND_CDATA, Qnil, Qnil, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterStartNamespaceDeclHandler(void *recv, + const XML_Char *prefix, + const XML_Char *uri) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL, + (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil), + (uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil), recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterEndNamespaceDeclHandler(void *recv, + const XML_Char *prefix) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL, + (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil), + Qnil, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} +#endif + +#ifdef HAVE_XML_SETPARAMENTITYPARSING +static void +#ifdef HAVE_EXPAT_H +iterStartDoctypeDeclHandler(void *recv, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +#else +iterStartDoctypeDeclHandler(void *recv, + const XML_Char *doctypeName) +#endif +{ + XMLParser* parser; + VALUE valary = Qnil; + + GET_PARSER(recv, parser); +#ifdef HAVE_EXPAT_H + valary = rb_ary_new3(3, + (sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil), + (pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil), + (has_internal_subset ? Qtrue : Qfalse)); +#endif + rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL, + TO_(ENC_(rb_str_new2((char*)doctypeName))), + valary, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterEndDoctypeDeclHandler(void *recv) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symEND_DOCTYPE_DECL, + Qnil, + Qnil, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} +#endif + + +#ifdef HAVE_EXPAT_H + +static VALUE +makeContentArray(XMLParser* parser, XML_Content* model) +{ + static const char* content_type_name[] = { + NULL, "EMPTY", "ANY", "MIXED", "NAME", "CHOICE", "SEQ" + }; + static const char* content_quant_name[] = { + "", "?", "*", "+" + }; + unsigned int i; + VALUE children = Qnil; + const char* type_name = content_type_name[model->type]; + const char* quant_name = content_quant_name[model->quant]; + VALUE ret = rb_ary_new3(3, + TO_(ENC_(rb_str_new2((char*)type_name))), + TO_(ENC_(rb_str_new2((char*)quant_name))), + (model->name ? TO_(ENC_(rb_str_new2((char*)model->name))) : + Qnil)); + if (model->numchildren > 0) { + children = rb_ary_new(); + for (i = 0; i < model->numchildren; i++) { + VALUE child = makeContentArray(parser, model->children + i); + rb_ary_push(children, child); + } + } + rb_ary_push(ret, children); + return ret; +} + + + +static void +iterElementDeclHandler(void *recv, + const XML_Char *name, + XML_Content *model) +{ + XMLParser* parser; + VALUE content; + GET_PARSER(recv, parser); + content = makeContentArray(parser, model); + rb_yield(rb_ary_new3(4, symELEMENT_DECL, + TO_(ENC_(rb_str_new2(name))), + content, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterAttlistDeclHandler (void *recv, + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired) +{ + XMLParser* parser; + VALUE valary; + + GET_PARSER(recv, parser); + valary = rb_ary_new3(4, + TO_(ENC_(rb_str_new2((char*)attname))), + TO_(ENC_(rb_str_new2((char*)att_type))), + (dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil), + (isrequired ? Qtrue : Qfalse)); + rb_yield(rb_ary_new3(4, symATTLIST_DECL, + TO_(ENC_(rb_str_new2(elname))), + valary, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterXmlDeclHandler (void *recv, + const XML_Char *version, + const XML_Char *encoding, + int standalone) +{ + XMLParser* parser; + VALUE valary; + + GET_PARSER(recv, parser); + valary = rb_ary_new3(3, + (version ? TO_(ENC_(rb_str_new2(version))) : Qnil), + (encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil), + INT2FIX(standalone)); + rb_yield(rb_ary_new3(4, symXML_DECL, + Qnil, + valary, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterEntityDeclHandler (void *recv, + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + XMLParser* parser; + VALUE valary; + + GET_PARSER(recv, parser); + valary = rb_ary_new3(6, + (is_parameter_entity ? Qtrue : Qfalse), + TO_(ENC_(rb_str_new((char*)value, value_length))), + (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil), + (notationName ? TO_(ENC_(rb_str_new2((char*)notationName))) + : Qnil)); + rb_yield(rb_ary_new3(4, symENTITY_DECL, + TO_(ENC_(rb_str_new2(entityName))), + valary, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +#endif + +#if 0 +static void +iterExternalParsedEntityDeclHandler(void *recv, + const XML_Char *entityName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + XMLParser* parser; + VALUE valary; + + GET_PARSER(recv, parser); + valary = rb_ary_new3(3, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + TO_(ENC_(rb_str_new2((char*)systemId))), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil)); + rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL, + TO_(ENC_(rb_str_new2((char*)entityName))), + valary, recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} + +static void +iterInternalParsedEntityDeclHandler(void *recv, + const XML_Char *entityName, + const XML_Char *replacementText, + int replacementTextLength) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL, + TO_(ENC_(rb_str_new2((char*)entityName))), + TO_(ENC_(rb_str_new((char*)replacementText, + replacementTextLength))), recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} +#endif + +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER +static void +iterSkippedEntityHandler(void *recv, + const XML_Char *entityName, + int is_parameter_entity) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY, + TO_(ENC_(rb_str_new2((char*)entityName))), + INT2FIX(is_parameter_entity), recv)); + if (parser->defaultCurrent) { + parser->defaultCurrent = 0; + XML_DefaultCurrent(parser->parser); + } +} +#endif + + + +/* Event handlers for instance method */ +static void +myStartElementHandler(void *recv, + const XML_Char *name, const XML_Char **atts) +{ + XMLParser* parser; + VALUE attrhash; + + GET_PARSER(recv, parser); +#ifdef NEW_EXPAT + parser->lastAttrs = atts; +#endif + attrhash = rb_hash_new(); + while (*atts) { + const char* key = *atts++; + const char* val = *atts++; + rb_hash_aset(attrhash, + FO_(TO_(ENC_(rb_str_new2((char*)key)))), + TO_(ENC_(rb_str_new2((char*)val)))); + } + rb_funcall((VALUE)recv, id_startElementHandler, 2, + TO_(ENC_(rb_str_new2((char*)name))), attrhash); +} + +static void +myEndElementHandler(void *recv, + const XML_Char *name) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_endElementHandler, 1, + TO_(ENC_(rb_str_new2((char*)name)))); +} + +static void +myCharacterDataHandler(void *recv, + const XML_Char *s, + int len) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_characterDataHandler, 1, + TO_(ENC_(rb_str_new((char*)s, len)))); +} + +static void +myProcessingInstructionHandler(void *recv, + const XML_Char *target, + const XML_Char *data) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_processingInstructionHandler, 2, + TO_(ENC_(rb_str_new2((char*)target))), + TO_(ENC_(rb_str_new2((char*)data)))); +} + +static void +myDefaultHandler(void *recv, + const XML_Char *s, + int len) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_defaultHandler, 1, + TO_(ENC_(rb_str_new((char*)s, len)))); +} + +#ifdef NEW_EXPAT +static void +myDefaultExpandHandler(void *recv, + const XML_Char *s, + int len) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_defaultExpandHandler, 1, + TO_(ENC_(rb_str_new((char*)s, len)))); +} +#endif + +void +myUnparsedEntityDeclHandler(void *recv, + const XML_Char *entityName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5, + TO_(ENC_(rb_str_new2((char*)entityName))), + (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + TO_(ENC_(rb_str_new2((char*)systemId))), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil), + TO_(ENC_(rb_str_new2((char*)notationName)))); +} + +void +myNotationDeclHandler(void *recv, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_notationDeclHandler, 4, + TO_(ENC_(rb_str_new2((char*)notationName))), + (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil)); +} + +int +myExternalEntityRefHandler(XML_Parser xmlparser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + XMLParser* parser; + VALUE recv; + VALUE ret; + + recv = (VALUE)XML_GetUserData(xmlparser); + GET_PARSER(recv, parser); + ret = rb_funcall(recv, id_externalEntityRefHandler, 4, + (context ? TO_(ENC_(rb_str_new2((char*)context))): Qnil), + (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil)); + /* The error status in this handler should be returned + by the exception. */ + return Qnil; +} + +#ifdef NEW_EXPAT +static void +myCommentHandler(void *recv, + const XML_Char *s) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_commentHandler, 1, + TO_(ENC_(rb_str_new2((char*)s)))); +} + +static void +myStartCdataSectionHandler(void *recv) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_startCdataSectionHandler, 0); +} + +static void +myEndCdataSectionHandler(void *recv) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_endCdataSectionHandler, 0); +} + +static void +myStartNamespaceDeclHandler(void *recv, + const XML_Char *prefix, + const XML_Char *uri) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2, + (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil), + (uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil)); +} + +static void +myEndNamespaceDeclHandler(void *recv, + const XML_Char *prefix) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1, + (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil)); +} + +static int +myNotStandaloneHandler(void *recv) +{ + XMLParser* parser; + VALUE v; + + GET_PARSER(recv, parser); + v = rb_funcall((VALUE)recv, id_notStandaloneHandler, 0); + Check_Type(v, T_FIXNUM); + return FIX2INT(v); +} +#endif + +#ifdef HAVE_XML_SETPARAMENTITYPARSING +static void +#ifdef HAVE_EXPAT_H +myStartDoctypeDeclHandler(void *recv, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +#else +myStartDoctypeDeclHandler(void *recv, + const XML_Char *doctypeName) +#endif +{ + XMLParser* parser; + GET_PARSER(recv, parser); +#ifdef HAVE_EXPAT_H + rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4, + TO_(ENC_(rb_str_new2((char*)doctypeName))), + (sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil), + (pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil), + (has_internal_subset ? Qtrue : Qfalse)); +#else + rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4, + TO_(ENC_(rb_str_new2((char*)doctypeName))), + Qnil, Qnil, Qfalse); +#endif +} + +static void +myEndDoctypeDeclHandler(void *recv) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_endDoctypeDeclHandler, 0); +} +#endif + + +#ifdef HAVE_EXPAT_H + +static void +myElementDeclHandler(void *recv, + const XML_Char *name, + XML_Content *model) +{ + XMLParser* parser; + VALUE content; + GET_PARSER(recv, parser); + content = makeContentArray(parser, model); + rb_funcall((VALUE)recv, id_elementDeclHandler, 2, + TO_(ENC_(rb_str_new2(name))), content); +} + +static void +myAttlistDeclHandler (void *recv, + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_attlistDeclHandler, 5, + TO_(ENC_(rb_str_new2(elname))), + TO_(ENC_(rb_str_new2((char*)attname))), + TO_(ENC_(rb_str_new2((char*)att_type))), + (dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil), + (isrequired ? Qtrue : Qfalse)); +} + +static void +myXmlDeclHandler (void *recv, + const XML_Char *version, + const XML_Char *encoding, + int standalone) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_xmlDeclHandler, 3, + (version ? TO_(ENC_(rb_str_new2(version))) : Qnil), + (encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil), + INT2FIX(standalone)); +} + +static void +myEntityDeclHandler (void *recv, + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_entityDeclHandler, 7, + TO_(ENC_(rb_str_new2(entityName))), + (is_parameter_entity ? Qtrue : Qfalse), + TO_(ENC_(rb_str_new((char*)value, value_length))), + (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil), + (notationName ? TO_(ENC_(rb_str_new2((char*)notationName))) + : Qnil)); +} + +#endif + +#if 0 +static void +myExternalParsedEntityDeclHandler(void *recv, + const XML_Char *entityName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4, + TO_(ENC_(rb_str_new2((char*)entityName))), + (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil), + TO_(ENC_(rb_str_new2((char*)systemId))), + (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil)); +} + +static void +myInternalParsedEntityDeclHandler(void *recv, + const XML_Char *entityName, + const XML_Char *replacementText, + int replacementTextLength) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2, + TO_(ENC_(rb_str_new2((char*)entityName))), + TO_(ENC_(rb_str_new((char*)replacementText, + replacementTextLength)))); +} +#endif + + +static VALUE +XMLEncoding_map(VALUE obj, VALUE i) +{ + return i; +} + +static VALUE +XMLEncoding_convert(VALUE obj, VALUE str) +{ + return INT2FIX('?'); +} + +static int +myEncodingConv(void *data, const char *s) +{ + VALUE v; + int len; + int slen = RSTRING_PTR(rb_ivar_get((VALUE)data, + id_map))[*(unsigned char*)s]; + + v = rb_funcall((VALUE)data, id_convert, 1, ENC_(rb_str_new((char*)s, -slen))); + switch (TYPE(v)) { + case T_FIXNUM: + return FIX2INT(v); + case T_STRING: + len = RSTRING_LEN(v); + if (len == 1) { + return (unsigned char)*RSTRING_PTR(v); + } + else if (len >= 2) { + return (unsigned char)*RSTRING_PTR(v) | + (unsigned char)*(RSTRING_PTR(v) + 1) << 8; + } + } + return 0; +} + +#if 0 +static int +iterUnknownEncodingHandler(void *recv, + const XML_Char *name, + XML_Encoding *info) +{ + XMLParser* parser; + VALUE ret; + + if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0)) + return 0; + + GET_PARSER(recv, parser); + ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING, + TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv)); + if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) { + int i; + ID mid = rb_intern("map"); + VALUE cmap = rb_str_new(NULL, 256); + rb_ivar_set(ret, id_map, cmap); + + for (i = 0; i < 256; i++) { + VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i)); + RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m); + } + /* protect object form GC */ + rb_ivar_set(recv, rb_intern("_encoding"), ret); + info->data = (void*)ret; + info->convert = myEncodingConv; + return 1; + } + + return 0; +} +#endif + +#ifdef XML_ENC_PATH +/* + * Encoding map functions come from XML::Parser Version 2.19 + * + * Copyright 1998 Larry Wall and Clark Cooper + * All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the same terms as Perl itself. + */ +static Encinfo* +getEncinfo(char* data, int size) +{ + Encmap_Header* header = (Encmap_Header*)data; + unsigned short prefixes_size; + unsigned short bytemap_size; + Encinfo* ret; + int i; + PrefixMap* prefixes; + unsigned short *bytemap; + + if (size < sizeof(Encmap_Header) || ntohl(header->magic) != ENCMAP_MAGIC) + return NULL; + prefixes_size = ntohs(header->pfsize); + bytemap_size = ntohs(header->bmsize); + if (size != (sizeof(Encmap_Header) + + prefixes_size * sizeof(PrefixMap) + + bytemap_size * sizeof(unsigned short))) + return NULL; + if ((ret = (Encinfo*)malloc(sizeof(Encinfo))) == NULL) { + return NULL; + } + ret->prefixes_size = prefixes_size; + ret->bytemap_size = bytemap_size; + for (i = 0; i < 256; i++) + ret->firstmap[i] = ntohl(header->map[i]); + prefixes = (PrefixMap*)(data + sizeof(Encmap_Header)); + bytemap = (unsigned short*)(data + sizeof(Encmap_Header) + + sizeof(PrefixMap)*prefixes_size); + if ((ret->prefixes = + (PrefixMap*)malloc(sizeof(PrefixMap)*prefixes_size)) == NULL) { + free(ret); + return NULL; + } + if ((ret->bytemap = + (unsigned short*)malloc(sizeof(unsigned short)*bytemap_size)) == NULL) { + free(ret->prefixes); + free(ret); + return NULL; + } + for (i = 0; i < prefixes_size; i++, prefixes++) { + ret->prefixes[i].min = prefixes->min; + ret->prefixes[i].len = prefixes->len; + ret->prefixes[i].bmap_start = ntohs(prefixes->bmap_start); + memcpy(ret->prefixes[i].ispfx, prefixes->ispfx, + sizeof(prefixes->ispfx) + sizeof(prefixes->ischar)); + } + for (i = 0; i < bytemap_size; i++) + ret->bytemap[i] = ntohs(bytemap[i]); + + return ret; +} + +static int +convertEncoding(Encinfo* enc, const char* seq) +{ + PrefixMap* curpfx; + int count; + int index = 0; + + for (count = 0; count < 4; count++) { + unsigned char byte = (unsigned char)seq[count]; + unsigned char bndx; + unsigned char bmsk; + int offset; + + curpfx = &enc->prefixes[index]; + offset = ((int)byte) - curpfx->min; + if (offset < 0) + break; + if (offset >= curpfx->len && curpfx->len != 0) + break; + + bndx = byte >> 3; + bmsk = 1 << (byte & 0x7); + + if (curpfx->ispfx[bndx] & bmsk) { + index = enc->bytemap[curpfx->bmap_start + offset]; + } + else if (curpfx->ischar[bndx] & bmsk) { + return enc->bytemap[curpfx->bmap_start + offset]; + } + else + break; + } + + return -1; +} + +static void +releaseEncoding(Encinfo* enc) +{ + if (enc) { + if (enc->prefixes) + free(enc->prefixes); + if (enc->bytemap) + free(enc->bytemap); + free(enc); + } +} + +static Encinfo* +findEncoding(const char* encname) +{ + FILE* fp; + Encinfo* enc; + struct stat st; + int size; + int len; + char file[PATH_MAX] = "\0"; + const char* p; + char* buf; +#ifdef DOSISH + const char sepchar = '\\'; +#else + const char sepchar = '/'; +#endif + const char* const encext = ".enc"; + + rb_secure(2); + /* make map file path */ + if (XML_ENC_PATH != NULL) { + strncpy(file, XML_ENC_PATH, PATH_MAX - 1); + file[PATH_MAX - 1] = '\0'; + } + len = strlen(file); + if (len > 0 && len < PATH_MAX - 1 && file[len - 1] != sepchar) + file[len++] = sepchar; + for (p = encname; *p && len < PATH_MAX - 1; p++, len++) { + file[len] = tolower(*p); + } + file[len] = '\0'; + strncat(file, encext, PATH_MAX - len -1); + + if ((fp = fopen(file, "rb")) == NULL) { + return NULL; + } + + /* get file length */ + fstat(fileno(fp), &st); + size = st.st_size; + + if ((buf = (char*)malloc(size)) == NULL) { + fclose(fp); + return NULL; + } + + fread(buf, 1, size, fp); + fclose(fp); + enc = getEncinfo(buf, size); + free(buf); + return enc; +} + +#endif + +static int +myUnknownEncodingHandler(void *recv, + const XML_Char *name, + XML_Encoding *info) +{ + XMLParser* parser; + VALUE ret; + + GET_PARSER(recv, parser); + parser->detectedEncoding = name; + + if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0)) +#ifndef XML_ENC_PATH + return 0; +#else + { + Encinfo* enc; + + if ((enc = findEncoding(name)) != NULL) { + memcpy(info->map, enc->firstmap, sizeof(int)*256); + info->data = enc; + info->convert = (int(*)(void*,const char*))convertEncoding; + info->release = (void(*)(void*))releaseEncoding; + return 1; + } + else + return 0; + } +#endif + + ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1, + TO_(ENC_(rb_str_new2((char*)name)))); + if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) { + int i; + ID mid = rb_intern("map"); + VALUE cmap = rb_str_new(NULL, 256); + rb_ivar_set(ret, id_map, cmap); + + if (OBJ_TAINTED(ret)) + taintParser(parser); + TO_(cmap); + + for (i = 0; i < 256; i++) { + VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i)); + RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m); + } + /* protect object form GC */ + rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret); + info->data = (void*)ret; + info->convert = myEncodingConv; + + return 1; + } + + return 0; +} + +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER +static void +mySkippedEntityHandler(void *recv, + const XML_Char *entityName, + int is_parameter_entity) +{ + XMLParser* parser; + GET_PARSER(recv, parser); + rb_funcall((VALUE)recv, id_skippedEntityHandler, 2, + TO_(ENC_(rb_str_new2((char*)entityName))), + INT2FIX(is_parameter_entity)); +} +#endif + + +/* constructor */ +static VALUE +XMLParser_new(int argc, VALUE* argv, VALUE klass) +{ + XMLParser* parser; + VALUE obj; + VALUE arg1; + VALUE arg2; + VALUE arg3; + int count; + char* encoding = NULL; +#ifdef NEW_EXPAT + char* nssep = NULL; +#endif + char* context = NULL; + XMLParser* rootparser = NULL; + VALUE parent = Qnil; + + count = rb_scan_args(argc, argv, "03", &arg1, &arg2, &arg3); + if (count == 1) { + /* new(encoding) */ + if (TYPE(arg1) != T_NIL) { + Check_Type(arg1, T_STRING); /* encoding */ + encoding = RSTRING_PTR(arg1); + } + } + else if (count == 2) { + /* new(encoding, nschar) */ + /* new(parser, context) */ +#ifdef NEW_EXPAT + if (TYPE(arg1) != T_DATA) { + if (TYPE(arg1) != T_NIL) { + Check_Type(arg1, T_STRING); /* encoding */ + encoding = RSTRING_PTR(arg1); + } + Check_Type(arg2, T_STRING); /* nschar */ + nssep = RSTRING_PTR(arg2); + } + else { +#endif + Check_Type(arg1, T_DATA); /* parser */ + GET_PARSER(arg1, rootparser); + if (!NIL_P(arg2)) { + Check_Type(arg2, T_STRING); /* context */ + context = RSTRING_PTR(arg2); + } + parent = arg1; +#ifdef NEW_EXPAT + } +#endif + } + else if (count == 3) { + /* new(parser, context, encoding) */ + Check_Type(arg1, T_DATA); /* parser */ + GET_PARSER(arg1, rootparser); + if (!NIL_P(arg2)) { + Check_Type(arg2, T_STRING); /* context */ + context = RSTRING_PTR(arg2); + } + Check_Type(arg3, T_STRING); /* encoding */ + encoding = RSTRING_PTR(arg3); + parent = arg1; + } + + /* create object */ + obj = Data_Make_Struct(klass, XMLParser, + XMLParser_mark, XMLParser_free, parser); + /* create parser */ + if (rootparser == NULL) { +#ifdef NEW_EXPAT + if (nssep == NULL) + parser->parser = XML_ParserCreate(encoding); + else + parser->parser = XML_ParserCreateNS(encoding, nssep[0]); +#else + parser->parser = XML_ParserCreate(encoding); +#endif + parser->tainted = 0; + parser->context = NULL; + } + else { + parser->parser = XML_ExternalEntityParserCreate(rootparser->parser, + context, encoding); + /* clear all inhrited handlers, + because handlers should be set in "parse" method */ + XML_SetElementHandler(parser->parser, NULL, NULL); + XML_SetCharacterDataHandler(parser->parser, NULL); + XML_SetProcessingInstructionHandler(parser->parser, NULL); + XML_SetDefaultHandler(parser->parser, NULL); + XML_SetUnparsedEntityDeclHandler(parser->parser, NULL); + XML_SetNotationDeclHandler(parser->parser, NULL); + XML_SetExternalEntityRefHandler(parser->parser, NULL); +#ifdef NEW_EXPAT + XML_SetCommentHandler(parser->parser, NULL); + XML_SetCdataSectionHandler(parser->parser, NULL, NULL); + XML_SetNamespaceDeclHandler(parser->parser, NULL, NULL); + XML_SetNotStandaloneHandler(parser->parser, NULL); +#endif +#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER + XML_SetDoctypeDeclHandler(parser->parser, NULL, NULL); +#endif +#ifdef HAVE_EXPAT_H + XML_SetElementDeclHandler(parser->parser, NULL); + XML_SetAttlistDeclHandler(parser->parser, NULL); + XML_SetXmlDeclHandler(parser->parser, NULL); + XML_SetEntityDeclHandler(parser->parser, NULL); +#endif +#if 0 + XML_SetExternalParsedEntityDeclHandler(parser->parser, NULL); + XML_SetInternalParsedEntityDeclHandler(parser->parser, NULL); +#endif +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER + XML_SetSkippedEntityHandler(parser->parser, NULL); +#endif + if (rootparser->tainted) + parser->tainted |= 1; + parser->context = context; + } + if (!parser->parser) + rb_raise(eXMLParserError, "cannot create parser"); + + /* setting up internal data */ + XML_SetUserData(parser->parser, (void*)obj); + parser->iterator = 0; + parser->defaultCurrent = 0; +#ifdef NEW_EXPAT + parser->lastAttrs = NULL; +#endif + parser->parent = parent; + parser->detectedEncoding = NULL; + + rb_obj_call_init(obj, argc, argv); + + return obj; +} + +static VALUE +XMLParser_initialize(VALUE obj) +{ + return Qnil; +} + +#ifdef HAVE_XML_PARSERRESET +static VALUE +XMLParser_reset(int argc, VALUE* argv, VALUE obj) +{ + XMLParser* parser; + VALUE vencoding = Qnil; + char* encoding = NULL; + int count; + + count = rb_scan_args(argc, argv, "01", &vencoding); + + GET_PARSER(obj, parser); + if (count > 0 && TYPE(vencoding) != T_NIL) { + Check_Type(vencoding, T_STRING); + encoding = RSTRING_PTR(vencoding); + } + XML_ParserReset(parser->parser, encoding); + /* setting up internal data */ + XML_SetUserData(parser->parser, (void*)obj); + parser->iterator = 0; + parser->defaultCurrent = 0; +#ifdef NEW_EXPAT + parser->lastAttrs = NULL; +#endif + parser->tainted = 0; + parser->detectedEncoding = NULL; + + return obj; +} +#endif + +static void +setup_evnet_handlers(XMLParser* parser, VALUE obj) { + XML_StartElementHandler start = NULL; + XML_EndElementHandler end = NULL; +#ifdef NEW_EXPAT + XML_StartCdataSectionHandler startC = NULL; + XML_EndCdataSectionHandler endC = NULL; + XML_StartNamespaceDeclHandler startNS = NULL; + XML_EndNamespaceDeclHandler endNS = NULL; +#endif +#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER + XML_StartDoctypeDeclHandler startDoctype = NULL; + XML_EndDoctypeDeclHandler endDoctype = NULL; +#endif + + /* Call as iterator */ + if (parser->iterator) { + XML_SetElementHandler(parser->parser, + iterStartElementHandler, iterEndElementHandler); + XML_SetCharacterDataHandler(parser->parser, + iterCharacterDataHandler); + XML_SetProcessingInstructionHandler(parser->parser, + iterProcessingInstructionHandler); + /* check dummy default handler */ +#ifdef NEW_EXPAT + if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0)) + XML_SetDefaultHandlerExpand(parser->parser, iterDefaultHandler); + else +#endif + if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0)) + XML_SetDefaultHandler(parser->parser, iterDefaultHandler); + + if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0)) + XML_SetUnparsedEntityDeclHandler(parser->parser, + iterUnparsedEntityDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0)) + XML_SetNotationDeclHandler(parser->parser, + iterNotationDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0)) + XML_SetExternalEntityRefHandler(parser->parser, + iterExternalEntityRefHandler); +#ifdef NEW_EXPAT + if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0)) + XML_SetCommentHandler(parser->parser, iterCommentHandler); + + if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0)) + startC = iterStartCdataSectionHandler; + if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0)) + endC = iterEndCdataSectionHandler; + if (startC || endC) + XML_SetCdataSectionHandler(parser->parser, startC, endC); + + if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0)) + startNS = iterStartNamespaceDeclHandler; + if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0)) + endNS = iterEndNamespaceDeclHandler; + if (startNS || endNS) + XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS); + if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0)) + XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler); +#endif +#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER + if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0)) + startDoctype = iterStartDoctypeDeclHandler; + if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0)) + endDoctype = iterEndDoctypeDeclHandler; + if (startDoctype || endDoctype) + XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype); +#endif +#ifdef HAVE_EXPAT_H + if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0)) + XML_SetElementDeclHandler(parser->parser, iterElementDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0)) + XML_SetAttlistDeclHandler(parser->parser, iterAttlistDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0)) + XML_SetXmlDeclHandler(parser->parser, iterXmlDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0)) + XML_SetEntityDeclHandler(parser->parser, iterEntityDeclHandler); +#endif +#if 0 + if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0)) + XML_SetExternalParsedEntityDeclHandler(parser->parser, + iterExternalParsedEntityDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0)) + XML_SetInternalParsedEntityDeclHandler(parser->parser, + iterInternalParsedEntityDeclHandler); +#endif + /* Call non-iterator version of UnknownEncoding handler, + because the porcedure block often returns the unexpected value. */ + XML_SetUnknownEncodingHandler(parser->parser, + myUnknownEncodingHandler, + (void*)obj); +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER + if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0)) + XML_SetSkippedEntityHandler(parser->parser, iterSkippedEntityHandler); +#endif + } + /* Call as not iterator */ + else { + if (rb_method_boundp(CLASS_OF(obj), id_startElementHandler, 0)) + start = myStartElementHandler; + if (rb_method_boundp(CLASS_OF(obj), id_endElementHandler, 0)) + end = myEndElementHandler; + if (start || end) + XML_SetElementHandler(parser->parser, start, end); + if (rb_method_boundp(CLASS_OF(obj), id_characterDataHandler, 0)) + XML_SetCharacterDataHandler(parser->parser, + myCharacterDataHandler); + if (rb_method_boundp(CLASS_OF(obj), + id_processingInstructionHandler, 0)) + XML_SetProcessingInstructionHandler(parser->parser, + myProcessingInstructionHandler); +#ifdef NEW_EXPAT + if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0)) + XML_SetDefaultHandlerExpand(parser->parser, myDefaultExpandHandler); + else +#endif + if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0)) { + XML_SetDefaultHandler(parser->parser, myDefaultHandler); + } + if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0)) + XML_SetUnparsedEntityDeclHandler(parser->parser, + myUnparsedEntityDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0)) + XML_SetNotationDeclHandler(parser->parser, + myNotationDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0)) + XML_SetExternalEntityRefHandler(parser->parser, + myExternalEntityRefHandler); +#ifdef NEW_EXPAT + if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0)) + XML_SetCommentHandler(parser->parser, myCommentHandler); + + if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0)) + startC = myStartCdataSectionHandler; + if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0)) + endC = myEndCdataSectionHandler; + if (startC || endC) + XML_SetCdataSectionHandler(parser->parser, startC, endC); + + if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0)) + startNS = myStartNamespaceDeclHandler; + if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0)) + endNS = myEndNamespaceDeclHandler; + if (startNS || endNS) + XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS); + if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0)) + XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler); +#endif +#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER + if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0)) + startDoctype = myStartDoctypeDeclHandler; + if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0)) + endDoctype = myEndDoctypeDeclHandler; + if (startDoctype || endDoctype) + XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype); +#endif +#ifdef HAVE_EXPAT_H + if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0)) + XML_SetElementDeclHandler(parser->parser, myElementDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0)) + XML_SetAttlistDeclHandler(parser->parser, myAttlistDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0)) + XML_SetXmlDeclHandler(parser->parser, myXmlDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0)) + XML_SetEntityDeclHandler(parser->parser, myEntityDeclHandler); +#endif +#if 0 + if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0)) + XML_SetExternalParsedEntityDeclHandler(parser->parser, + myExternalParsedEntityDeclHandler); + if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0)) + XML_SetInternalParsedEntityDeclHandler(parser->parser, + myInternalParsedEntityDeclHandler); +#endif + XML_SetUnknownEncodingHandler(parser->parser, + myUnknownEncodingHandler, + (void*)obj); +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER + if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0)) + XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler); +#endif + } +} + + +/* parse method */ +static VALUE +XMLParser_parse(int argc, VALUE* argv, VALUE obj) +{ + XMLParser* parser; + int ret; + VALUE str; + VALUE isFinal; + int final = 1; + int count; + int fromStream = 0; + ID mid = rb_intern("gets"); + ID linebuf = rb_intern("_linebuf"); + + count = rb_scan_args(argc, argv, "02", &str, &isFinal); + /* If "str" has public "gets" method, it will be considered *stream* */ + if (!rb_obj_is_kind_of(str, rb_cString) && + rb_method_boundp(CLASS_OF(str), mid, 1)) { + fromStream = 1; + } + else if (!NIL_P(str)) { + Check_Type(str, T_STRING); + } + if (count >= 2) { + if (isFinal == Qtrue) + final = 1; + else if (isFinal == Qfalse) + final = 0; + else + rb_raise(rb_eTypeError, "not valid value"); + } + + GET_PARSER(obj, parser); + + parser->iterator = rb_block_given_p(); + + /* Setup event handlers */ + setup_evnet_handlers(parser, obj); + + /* Parse from stream (probably slightly slow) */ + if (fromStream) { + VALUE buf; + + if (OBJ_TAINTED(str)) + taintParser(parser); + do { + buf = rb_funcall(str, mid, 0); + if (!NIL_P(buf)) { + Check_Type(buf, T_STRING); + if (OBJ_TAINTED(buf)) + taintParser(parser); + rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/ + ret = XML_Parse(parser->parser, + RSTRING_PTR(buf), RSTRING_LEN(buf), 0); + } + else { + ret = XML_Parse(parser->parser, NULL, 0, 1); + } + if (!ret) { + int err = XML_GetErrorCode(parser->parser); + const char* errStr = XML_ErrorString(err); + rb_raise(eXMLParserError, (char*)errStr); + } + } while (!NIL_P(buf)); + return Qnil; + } + + /* Parse string */ + if (!NIL_P(str)) { +#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET) + int err; +#endif + if (OBJ_TAINTED(str)) + taintParser(parser); + ret = XML_Parse(parser->parser, + RSTRING_PTR(str), RSTRING_LEN(str), final); +#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET) + /* Ruby 1.9.1 Encoding conversion */ + err = XML_GetErrorCode(parser->parser); + if (final && err == XML_ERROR_UNKNOWN_ENCODING) { + rb_encoding* enc; + volatile VALUE encobj; + volatile VALUE ustr; + enc = rb_enc_find(parser->detectedEncoding); + if ((int)ENC_TO_ENCINDEX(enc) != rb_ascii8bit_encindex()) { + rb_enc_associate(str, enc); + encobj = rb_enc_from_encoding(enc_xml); + /* rb_str_encode may raises an exception */ + ustr = rb_str_encode(str, encobj, 0, Qnil); + if (!NIL_P(ustr)) { + XML_ParserReset(parser->parser, "utf-8"); + XML_SetUserData(parser->parser, (void*)obj); + parser->defaultCurrent = 0; +#ifdef NEW_EXPAT + parser->lastAttrs = NULL; +#endif + parser->detectedEncoding = NULL; + setup_evnet_handlers(parser, obj); + ret = XML_Parse(parser->parser, + RSTRING_PTR(ustr), RSTRING_LEN(ustr), final); + } + } + } +#endif + } + else + ret = XML_Parse(parser->parser, NULL, 0, final); + if (!ret) { + int err = XML_GetErrorCode(parser->parser); + const char* errStr = XML_ErrorString(err); + rb_raise(eXMLParserError, (char*)errStr); + } + + return Qnil; +} + +/* done method */ +static VALUE +XMLParser_done(VALUE obj) +{ + XMLParser* parser; + + GET_PARSER(obj, parser); + if (parser->parser) { + XML_ParserFree(parser->parser); + parser->parser = NULL; + } + return Qnil; +} + +/* defaultCurrent method */ +static VALUE +XMLParser_defaultCurrent(VALUE obj) +{ + XMLParser* parser; + + GET_PARSER(obj, parser); + if (!(parser->iterator)) { + XML_DefaultCurrent(parser->parser); + } + else { + parser->defaultCurrent = 1; + } + return Qnil; +} + +/* line method */ +static VALUE +XMLParser_getCurrentLineNumber(VALUE obj) +{ + XMLParser* parser; + int line; + + GET_PARSER(obj, parser); + line = XML_GetCurrentLineNumber(parser->parser); + + return INT2FIX(line); +} + +/* column method */ +static VALUE +XMLParser_getCurrentColumnNumber(VALUE obj) +{ + XMLParser* parser; + int column; + + GET_PARSER(obj, parser); + column = XML_GetCurrentColumnNumber(parser->parser); + + return INT2FIX(column); +} + +/* byte index method */ +static VALUE +XMLParser_getCurrentByteIndex(VALUE obj) +{ + XMLParser* parser; + long pos; + + GET_PARSER(obj, parser); + pos = XML_GetCurrentByteIndex(parser->parser); + + return INT2FIX(pos); +} + +/* set URI base */ +static VALUE +XMLParser_setBase(VALUE obj, VALUE base) +{ + XMLParser* parser; + int ret; + + Check_Type(base, T_STRING); + GET_PARSER(obj, parser); + if (OBJ_TAINTED(base)) + taintParser(parser); + ret = XML_SetBase(parser->parser, RSTRING_PTR(base)); + + return INT2FIX(ret); +} + +/* get URI base */ +static VALUE +XMLParser_getBase(VALUE obj) +{ + XMLParser* parser; + const XML_Char* ret; + + GET_PARSER(obj, parser); + ret = XML_GetBase(parser->parser); + if (!ret) + return Qnil; + + return TO_(ENC_(rb_str_new2((char*)ret))); +} + +#ifdef NEW_EXPAT +#if 0 +static VALUE +XMLParser_getSpecifiedAttributes(VALUE obj) +{ + XMLParser* parser; + int count; + const XML_Char** atts; + VALUE attrhash; + + GET_PARSER(obj, parser); + atts = parser->lastAttrs; + if (!atts) + return Qnil; + count = XML_GetSpecifiedAttributeCount(parser->parser)/2; + attrhash = rb_hash_new(); + while (*atts) { + const char* key = *atts++; + atts++; + rb_hash_aset(attrhash, FO_(TO_(ENC_(rb_str_new2((char*)key)))), + (count-- > 0) ? Qtrue: Qfalse); + } + + return attrhash; +} +#else +static VALUE +XMLParser_getSpecifiedAttributes(VALUE obj) +{ + XMLParser* parser; + int i, count; + const XML_Char** atts; + VALUE attrarray; + + GET_PARSER(obj, parser); + atts = parser->lastAttrs; + if (!atts) + return Qnil; + count = XML_GetSpecifiedAttributeCount(parser->parser)/2; + attrarray = rb_ary_new2(count); + for (i = 0; i < count; i++, atts+=2) { + const char* key = *atts; + rb_ary_push(attrarray, TO_(ENC_(rb_str_new2((char*)key)))); + } + + return attrarray; +} +#endif + +static VALUE +XMLParser_getCurrentByteCount(VALUE obj) +{ + XMLParser* parser; + + GET_PARSER(obj, parser); + return INT2FIX(XML_GetCurrentByteCount(parser->parser)); +} +#endif + +#ifdef XML_DTD +static VALUE +XMLParser_setParamEntityParsing(VALUE obj, VALUE parsing) +{ + XMLParser* parser; + int ret; + + Check_Type(parsing, T_FIXNUM); + GET_PARSER(obj, parser); + ret = XML_SetParamEntityParsing(parser->parser, FIX2INT(parsing)); + + return INT2FIX(ret); +} +#endif + +static VALUE +XMLParser_s_expatVersion(VALUE obj) +{ +#if defined(HAVE_EXPAT_H) + return ENC_(rb_str_new2(XML_ExpatVersion())); +#elif defined(EXPAT_1_2) + return ENC_(rb_str_new2("1.2")); +#elif defined(NEW_EXPAT) + return ENC_(rb_str_new2("1.1")); +#else + return ENC_(rb_str_new2("1.0")); +#endif +} + +#ifdef HAVE_EXPAT_H +static VALUE +XMLParser_setReturnNSTriplet(VALUE obj, VALUE do_nst) +{ + XMLParser* parser; + int nst; + + GET_PARSER(obj, parser); + switch (TYPE(do_nst)) { + case T_TRUE: + nst = 1; + break; + case T_FALSE: + nst = 0; + break; + case T_FIXNUM: + nst = FIX2INT(do_nst); + break; + default: + rb_raise(rb_eTypeError, "not valid value"); + } + XML_SetReturnNSTriplet(parser->parser, nst); + + return Qnil; +} + + +static VALUE +XMLParser_getInputContext(VALUE obj) +{ + XMLParser* parser; + const char* buffer; + int offset; + int size; + VALUE ret = Qnil; + + GET_PARSER(obj, parser); + buffer = XML_GetInputContext(parser->parser, + &offset, + &size); + if (buffer && size > 0) { + ret = rb_ary_new3(2, + TO_(ENC_(rb_str_new(buffer, size))), + INT2FIX(offset)); + } + + return ret; +} + + +static VALUE +XMLParser_getIdAttrribute(VALUE obj) +{ + XMLParser* parser; + int idattr; + const XML_Char** atts; + + GET_PARSER(obj, parser); + atts = parser->lastAttrs; + if (!atts) + return Qnil; + idattr = XML_GetIdAttributeIndex(parser->parser); + if (idattr < 0) + return Qnil; + return TO_(ENC_(rb_str_new2((char*)atts[idattr]))); +} +#endif + +#ifdef HAVE_XML_USEFOREIGNDTD +static VALUE +XMLParser_useForeignDTD(VALUE obj, VALUE useDTD) +{ + XMLParser* parser; + int dtd; + int ret; + + GET_PARSER(obj, parser); + switch (TYPE(useDTD)) { + case T_TRUE: + dtd = 1; + break; + case T_FALSE: + dtd = 0; + break; + case T_FIXNUM: + dtd = FIX2INT(useDTD); + break; + default: + rb_raise(rb_eTypeError, "not valid value"); + } + ret = XML_UseForeignDTD(parser->parser, dtd); + + return INT2FIX(ret); +} +#endif + +#ifdef HAVE_XML_GETFEATURELIST +static VALUE +XMLParser_s_getFeatureList(VALUE obj) +{ + const XML_Feature* list; + VALUE ret = rb_hash_new(); + + list = XML_GetFeatureList(); + while (list && list->feature) { + rb_hash_aset(ret, FO_(ENC_(rb_str_new2(list->name))), INT2NUM(list->value)); + list++; + } + + return ret; +} +#endif + +void +Init_xmlparser() +{ + VALUE mXML; + +#ifdef HAVE_RUBY_ENCODING_H + enc_xml = rb_utf8_encoding(); +#endif + + eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError); + cXMLParser = rb_define_class("XMLParser", rb_cObject); + cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject); + + /* Class name aliases */ + if (rb_const_defined(rb_cObject, rb_intern("XML")) == Qtrue) + mXML = rb_const_get(rb_cObject, rb_intern("XML")); + else + mXML = rb_define_module("XML"); + rb_define_const(mXML, "ParserError", eXMLParserError); + rb_define_const(cXMLParser, "Error", eXMLParserError); + rb_define_const(mXML, "Parser", cXMLParser); + rb_define_const(mXML, "Encoding", cXMLEncoding); + + rb_define_singleton_method(cXMLParser, "new", XMLParser_new, -1); + rb_define_singleton_method(cXMLParser, "expatVersion", + XMLParser_s_expatVersion, 0); + rb_define_method(cXMLParser, "initialize", XMLParser_initialize, -1); + rb_define_method(cXMLParser, "parse", XMLParser_parse, -1); + rb_define_method(cXMLParser, "done", XMLParser_done, 0); + rb_define_method(cXMLParser, "defaultCurrent", XMLParser_defaultCurrent, 0); + rb_define_method(cXMLParser, "line", XMLParser_getCurrentLineNumber, 0); + rb_define_method(cXMLParser, "column", XMLParser_getCurrentColumnNumber, 0); + rb_define_method(cXMLParser, "byteIndex", XMLParser_getCurrentByteIndex, 0); + rb_define_method(cXMLParser, "setBase", XMLParser_setBase, 1); + rb_define_method(cXMLParser, "getBase", XMLParser_getBase, 0); +#ifdef NEW_EXPAT + rb_define_method(cXMLParser, "getSpecifiedAttributes", + XMLParser_getSpecifiedAttributes, 0); + rb_define_method(cXMLParser, "byteCount", XMLParser_getCurrentByteCount, 0); +#endif +#ifdef XML_DTD + rb_define_method(cXMLParser, "setParamEntityParsing", + XMLParser_setParamEntityParsing, 1); +#endif +#ifdef HAVE_EXPAT_H + rb_define_method(cXMLParser, "setReturnNSTriplet", + XMLParser_setReturnNSTriplet, 1); + rb_define_method(cXMLParser, "getInputContext", + XMLParser_getInputContext, 0); + rb_define_method(cXMLParser, "getIdAttribute", + XMLParser_getIdAttrribute, 0); +#endif + +#ifdef HAVE_XML_PARSERRESET + rb_define_method(cXMLParser, "reset", XMLParser_reset, -1); +#endif + + rb_define_method(cXMLEncoding, "map", XMLEncoding_map, 1); + rb_define_method(cXMLEncoding, "convert", XMLEncoding_convert, 1); + +#ifdef HAVE_XML_USEFOREIGNDTD + rb_define_method(cXMLParser, "useForeignDTD", + XMLParser_useForeignDTD, 1); +#endif +#ifdef HAVE_XML_GETFEATURELIST + rb_define_singleton_method(cXMLParser, "getFeatureList", + XMLParser_s_getFeatureList, 0); +#endif + +#define DEFINE_EVENT_CODE(klass, name) \ + rb_define_const(klass, #name, sym##name = ID2SYM(rb_intern(#name))) + + DEFINE_EVENT_CODE(cXMLParser, START_ELEM); + DEFINE_EVENT_CODE(cXMLParser, END_ELEM); + DEFINE_EVENT_CODE(cXMLParser, CDATA); + DEFINE_EVENT_CODE(cXMLParser, PI); + DEFINE_EVENT_CODE(cXMLParser, DEFAULT); + DEFINE_EVENT_CODE(cXMLParser, UNPARSED_ENTITY_DECL); + DEFINE_EVENT_CODE(cXMLParser, NOTATION_DECL); + DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_ENTITY_REF); +#ifdef NEW_EXPAT + DEFINE_EVENT_CODE(cXMLParser, COMMENT); + DEFINE_EVENT_CODE(cXMLParser, START_CDATA); + DEFINE_EVENT_CODE(cXMLParser, END_CDATA); + DEFINE_EVENT_CODE(cXMLParser, START_NAMESPACE_DECL); + DEFINE_EVENT_CODE(cXMLParser, END_NAMESPACE_DECL); +#endif +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER + DEFINE_EVENT_CODE(cXMLParser, SKIPPED_ENTITY); +#endif +#ifdef XML_DTD + rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_NEVER", + XML_PARAM_ENTITY_PARSING_NEVER); + rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_UNLESS_STANDALONE", + XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); + rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_ALWAYS", + XML_PARAM_ENTITY_PARSING_ALWAYS); +#endif +#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER + DEFINE_EVENT_CODE(cXMLParser, START_DOCTYPE_DECL); + DEFINE_EVENT_CODE(cXMLParser, END_DOCTYPE_DECL); +#endif +#ifdef HAVE_EXPAT_H + DEFINE_EVENT_CODE(cXMLParser, ELEMENT_DECL); + DEFINE_EVENT_CODE(cXMLParser, ATTLIST_DECL); + DEFINE_EVENT_CODE(cXMLParser, XML_DECL); + DEFINE_EVENT_CODE(cXMLParser, ENTITY_DECL); +#endif +#if 0 + DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_PARSED_ENTITY_DECL); + DEFINE_EVENT_CODE(cXMLParser, INTERNAL_PARSED_ENTITY_DECL); +#endif +#if 0 + DEFINE_EVENT_CODE(cXMLParser, UNKNOWN_ENCODING); +#endif + + id_map = rb_intern("_map"); + id_startElementHandler = rb_intern("startElement"); + id_endElementHandler = rb_intern("endElement"); + id_characterDataHandler = rb_intern("character"); + id_processingInstructionHandler = rb_intern("processingInstruction"); + id_defaultHandler = rb_intern("default"); + id_unparsedEntityDeclHandler = rb_intern("unparsedEntityDecl"); + id_notationDeclHandler = rb_intern("notationDecl"); + id_externalEntityRefHandler = rb_intern("externalEntityRef"); +#ifdef NEW_EXPAT + id_defaultExpandHandler = rb_intern("defaultExpand"); + id_commentHandler = rb_intern("comment"); + id_startCdataSectionHandler = rb_intern("startCdata"); + id_endCdataSectionHandler = rb_intern("endCdata"); + id_startNamespaceDeclHandler = rb_intern("startNamespaceDecl"); + id_endNamespaceDeclHandler = rb_intern("endNamespaceDecl"); + id_notStandaloneHandler = rb_intern("notStandalone"); +#endif +#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER + id_startDoctypeDeclHandler = rb_intern("startDoctypeDecl"); + id_endDoctypeDeclHandler = rb_intern("endDoctypeDecl"); +#endif + id_unknownEncoding = rb_intern("unknownEncoding"); + id_convert = rb_intern("convert"); +#ifdef HAVE_EXPAT_H + id_elementDeclHandler = rb_intern("elementDecl"); + id_attlistDeclHandler = rb_intern("attlistDecl"); + id_xmlDeclHandler = rb_intern("xmlDecl"); + id_entityDeclHandler = rb_intern("entityDecl"); +#endif +#if 0 + id_externalParsedEntityDeclHandler = rb_intern("externalParsedEntityDecl"); + id_internalParsedEntityDeclHandler = rb_intern("internalParsedEntityDecl"); +#endif +#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER + id_skippedEntityHandler = rb_intern("skippedEntity"); +#endif +} debian/patches/xmlparser-ftbfs-fix.patch0000644000000000000000000000126512262421624015546 0ustar --- ruby-xmlparser-0.7.2.orig/ext/xmlparser.c +++ ruby-xmlparser-0.7.2/ext/xmlparser.c @@ -1780,7 +1780,7 @@ XMLParser_parse(int argc, VALUE* argv, V if (!ret) { int err = XML_GetErrorCode(parser->parser); const char* errStr = XML_ErrorString(err); - rb_raise(eXMLParserError, (char*)errStr); + rb_raise(eXMLParserError, "%s", errStr); } } while (!NIL_P(buf)); return Qnil; @@ -1829,7 +1829,7 @@ XMLParser_parse(int argc, VALUE* argv, V if (!ret) { int err = XML_GetErrorCode(parser->parser); const char* errStr = XML_ErrorString(err); - rb_raise(eXMLParserError, (char*)errStr); + rb_raise(eXMLParserError, "%s", errStr); } return Qnil; debian/patches/replaces_gt_symbol_by_enconded_version.patch0000644000000000000000000000163712262421624021621 0ustar Index: xmlparser/lib/xml/dom/core.rb =================================================================== --- xmlparser.orig/lib/xml/dom/core.rb 2010-05-12 13:36:36.000000000 -0400 +++ xmlparser/lib/xml/dom/core.rb 2010-05-12 13:38:46.000000000 -0400 @@ -1846,6 +1846,8 @@ value << """ when ?< value << "<" + when ?> + value << ">" else value << code end Index: xmlparser/lib/xml/dom2/attr.rb =================================================================== --- xmlparser.orig/lib/xml/dom2/attr.rb 2010-05-12 13:39:12.000000000 -0400 +++ xmlparser/lib/xml/dom2/attr.rb 2010-05-12 13:40:15.000000000 -0400 @@ -122,7 +122,9 @@ value << """ elsif c == "<" value << "<" - else + elsif c == ">" + value << ">" + else value << c end end debian/patches/series0000644000000000000000000000013712262421624012034 0ustar replaces_gt_symbol_by_enconded_version.patch move-files-to-ext.patch xmlparser-ftbfs-fix.patch