ox-1.8.9/0000755000004100000410000000000012111637206012206 5ustar www-datawww-dataox-1.8.9/ext/0000755000004100000410000000000012111637206013006 5ustar www-datawww-dataox-1.8.9/ext/ox/0000755000004100000410000000000012111637206013434 5ustar www-datawww-dataox-1.8.9/ext/ox/extconf.rb0000644000004100000410000000502012111637206015424 0ustar www-datawww-datarequire 'mkmf' extension_name = 'ox' dir_config(extension_name) parts = RUBY_DESCRIPTION.split(' ') type = parts[0] type = 'ree' if 'ruby' == type && RUBY_DESCRIPTION.include?('Ruby Enterprise Edition') platform = RUBY_PLATFORM version = RUBY_VERSION.split('.') puts ">>>>> Creating Makefile for #{type} version #{RUBY_VERSION} on #{platform} <<<<<" dflags = { 'RUBY_TYPE' => type, (type.upcase + '_RUBY') => nil, 'RUBY_VERSION' => RUBY_VERSION, 'RUBY_VERSION_MAJOR' => version[0], 'RUBY_VERSION_MINOR' => version[1], 'RUBY_VERSION_MICRO' => version[2], 'HAS_RB_TIME_TIMESPEC' => ('ruby' == type && ('1.9.3' == RUBY_VERSION)) ? 1 : 0, #'HAS_RB_TIME_TIMESPEC' => ('ruby' == type && ('1.9.3' == RUBY_VERSION || '2' <= version[0])) ? 1 : 0, 'HAS_TM_GMTOFF' => ('ruby' == type && (('1' == version[0] && '9' == version[1]) || '2' <= version[0]) && !(platform.include?('solaris') || platform.include?('linux') || RUBY_PLATFORM =~ /(win|w)32$/)) ? 1 : 0, 'HAS_ENCODING_SUPPORT' => (('ruby' == type || 'rubinius' == type) && (('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0, 'HAS_PRIVATE_ENCODING' => ('jruby' == type && '1' == version[0] && '9' == version[1]) ? 1 : 0, 'HAS_NANO_TIME' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0, 'HAS_RSTRUCT' => ('ruby' == type || 'ree' == type) ? 1 : 0, 'HAS_IVAR_HELPERS' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0, 'HAS_PROC_WITH_BLOCK' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0, 'HAS_TOP_LEVEL_ST_H' => ('ree' == type || ('ruby' == type && '1' == version[0] && '8' == version[1])) ? 1 : 0, 'NEEDS_UIO' => (RUBY_PLATFORM =~ /(win|w)32$/) ? 0 : 1, } if RUBY_PLATFORM =~ /(win|w)32$/ || RUBY_PLATFORM =~ /solaris2\.10/ dflags['NEEDS_STPCPY'] = nil end if ['i386-darwin10.0.0', 'x86_64-darwin10.8.0'].include? RUBY_PLATFORM dflags['NEEDS_STPCPY'] = nil dflags['HAS_IVAR_HELPERS'] = 0 if ('ruby' == type && '1.9.1' == RUBY_VERSION) elsif 'x86_64-linux' == RUBY_PLATFORM && '1.9.3' == RUBY_VERSION && '2011-10-30' == RUBY_RELEASE_DATE begin dflags['NEEDS_STPCPY'] = nil if `more /etc/issue`.include?('CentOS release 5.4') rescue Exception => e end end dflags.each do |k,v| if v.nil? $CPPFLAGS += " -D#{k}" else $CPPFLAGS += " -D#{k}=#{v}" end end $CPPFLAGS += ' -Wall' #puts "*** $CPPFLAGS: #{$CPPFLAGS}" create_makefile(extension_name) %x{make clean} ox-1.8.9/ext/ox/cache.c0000644000004100000410000001405712111637206014652 0ustar www-datawww-data/* cache.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "cache.h" struct _Cache { /* The key is a length byte followed by the key as a string. If the key is longer than 254 characters then the length is 255. The key can be for a premature value and in that case the length byte is greater than the length of the key. */ char *key; VALUE value; struct _Cache *slots[16]; }; static void slot_print(Cache cache, unsigned int depth); static char* form_key(const char *s) { size_t len = strlen(s); char *d = ALLOC_N(char, len); *d = (255 <= len) ? 255 : len; memcpy(d + 1, s, len + 1); return d; } void ox_cache_new(Cache *cache) { *cache = ALLOC(struct _Cache); (*cache)->key = 0; (*cache)->value = Qundef; bzero((*cache)->slots, sizeof((*cache)->slots)); } VALUE ox_cache_get(Cache cache, const char *key, VALUE **slot, char **keyp) { unsigned char *k = (unsigned char*)key; Cache *cp; for (; '\0' != *k; k++) { cp = cache->slots + (unsigned int)(*k >> 4); /* upper 4 bits */ if (0 == *cp) { ox_cache_new(cp); } cache = *cp; cp = cache->slots + (unsigned int)(*k & 0x0F); /* lower 4 bits */ if (0 == *cp) { /* nothing on this tree so set key and value as a premature key/value pair */ ox_cache_new(cp); cache = *cp; cache->key = form_key(key); break; } else { int depth = (int)(k - (unsigned char*)key + 1); cache = *cp; if ('\0' == *(k + 1)) { /* exact match */ if (0 == cache->key) { /* nothing in this spot so take it */ cache->key = form_key(key); break; } else if ((depth == *cache->key || 255 < depth) && 0 == strcmp(key, cache->key + 1)) { /* match */ break; } else { /* have to move the current premature key/value deeper */ unsigned char *ck = (unsigned char*)(cache->key + depth + 1); Cache orig = *cp; cp = (*cp)->slots + (*ck >> 4); ox_cache_new(cp); cp = (*cp)->slots + (*ck & 0x0F); ox_cache_new(cp); (*cp)->key = cache->key; (*cp)->value = cache->value; orig->key = form_key(key); orig->value = Qundef; } } else { /* not exact match but on the path */ if (0 != cache->key) { /* there is a key/value here already */ if (depth == *cache->key || (255 <= depth && 0 == strncmp(cache->key, key, depth) && '\0' == cache->key[depth])) { /* key belongs here */ continue; } else { unsigned char *ck = (unsigned char*)(cache->key + depth + 1); Cache orig = *cp; cp = (*cp)->slots + (*ck >> 4); ox_cache_new(cp); cp = (*cp)->slots + (*ck & 0x0F); ox_cache_new(cp); (*cp)->key = cache->key; (*cp)->value = cache->value; orig->key = 0; orig->value = Qundef; } } } } } *slot = &cache->value; if (0 != keyp) { if (0 == cache->key) { // TBD bug somewhere printf("*** Error: failed to set the key for %s\n", key); *keyp = 0; } else { *keyp = cache->key + 1; } } return cache->value; } void ox_cache_print(Cache cache) { /*printf("-------------------------------------------\n");*/ slot_print(cache, 0); } static void slot_print(Cache c, unsigned int depth) { char indent[256]; Cache *cp; unsigned int i; if (sizeof(indent) - 1 < depth) { depth = ((int)sizeof(indent) - 1); } memset(indent, ' ', depth); indent[depth] = '\0'; for (i = 0, cp = c->slots; i < 16; i++, cp++) { if (0 == *cp) { /*printf("%s%02u:\n", indent, i);*/ } else { if (0 == (*cp)->key && Qundef == (*cp)->value) { printf("%s%02u:\n", indent, i); } else { const char *vs; const char *clas; if (Qundef == (*cp)->value) { vs = "undefined"; clas = ""; } else { VALUE rs = rb_funcall2((*cp)->value, rb_intern("to_s"), 0, 0); vs = StringValuePtr(rs); clas = rb_class2name(rb_obj_class((*cp)->value)); } printf("%s%02u: %s = %s (%s)\n", indent, i, (*cp)->key, vs, clas); } slot_print(*cp, depth + 2); } } } ox-1.8.9/ext/ox/parse.c0000644000004100000410000005610312111637206014717 0ustar www-datawww-data/* parse.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "ruby.h" #include "ox.h" static void read_instruction(PInfo pi); static void read_doctype(PInfo pi); static void read_comment(PInfo pi); static void read_element(PInfo pi); static void read_text(PInfo pi); /*static void read_reduced_text(PInfo pi); */ static void read_cdata(PInfo pi); static char* read_name_token(PInfo pi); static char* read_quoted_value(PInfo pi); static char* read_hex_uint64(char *b, uint64_t *up); static char* read_10_uint64(char *b, uint64_t *up); static char* ucs_to_utf8_chars(char *text, uint64_t u); static char* read_coded_chars(PInfo pi, char *text); static void next_non_white(PInfo pi); static int collapse_special(PInfo pi, char *str); /* This XML parser is a single pass, destructive, callback parser. It is a * single pass parse since it only make one pass over the characters in the * XML document string. It is destructive because it re-uses the content of * the string for values in the callback and places \0 characters at various * places to mark the end of tokens and strings. It is a callback parser like * a SAX parser because it uses callback when document elements are * encountered. * * Parsing is very tolerant. Lack of headers and even mispelled element * endings are passed over without raising an error. A best attempt is made in * all cases to parse the string. */ inline static void next_non_white(PInfo pi) { for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: return; } } } inline static void next_white(PInfo pi) { for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '\n': case '\r': case '\0': return; default: break; } } } VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) { struct _PInfo pi; int body_read = 0; if (0 == xml) { raise_error("Invalid arg, xml string can not be null", xml, 0); } if (DEBUG <= options->trace) { printf("Parsing xml:\n%s\n", xml); } /* initialize parse info */ pi.str = xml; pi.s = xml; pi.h = 0; pi.pcb = pcb; pi.obj = Qnil; pi.circ_array = 0; pi.options = options; while (1) { next_non_white(&pi); /* skip white space */ if ('\0' == *pi.s) { break; } if (body_read && 0 != endp) { *endp = pi.s; break; } if ('<' != *pi.s) { /* all top level entities start with < */ raise_error("invalid format, expected <", pi.str, pi.s); } pi.s++; /* past < */ switch (*pi.s) { case '?': /* processing instruction */ pi.s++; read_instruction(&pi); break; case '!': /* comment or doctype */ pi.s++; if ('\0' == *pi.s) { raise_error("invalid format, DOCTYPE or comment not terminated", pi.str, pi.s); } else if ('-' == *pi.s) { pi.s++; /* skip - */ if ('-' != *pi.s) { raise_error("invalid format, bad comment format", pi.str, pi.s); } else { pi.s++; /* skip second - */ read_comment(&pi); } } else if (0 == strncmp("DOCTYPE", pi.s, 7)) { pi.s += 7; read_doctype(&pi); } else { raise_error("invalid format, DOCTYPE or comment expected", pi.str, pi.s); } break; case '\0': raise_error("invalid format, document not terminated", pi.str, pi.s); default: read_element(&pi); body_read = 1; break; } } return pi.obj; } static char* gather_content(const char *src, char *content, size_t len) { for (; 0 < len; src++, content++, len--) { switch (*src) { case '?': if ('>' == *(src + 1)) { *content = '\0'; return (char*)(src + 1); } *content = *src; break; case '\0': return 0; default: *content = *src; break; } } return 0; } /* Entered after the "s; if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) { raise_error("processing instruction content too large or not terminated", pi->str, pi->s); } next_non_white(pi); c = *pi->s; *end = '\0'; /* terminate name */ if ('?' != c) { while ('?' != *pi->s) { if ('\0' == *pi->s) { raise_error("invalid format, processing instruction not terminated", pi->str, pi->s); } next_non_white(pi); a->name = read_name_token(pi); end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { attrs_ok = 0; break; } *end = '\0'; /* terminate name */ /* read value */ next_non_white(pi); a->value = read_quoted_value(pi); a++; if (MAX_ATTRS <= (a - attrs)) { attrs_ok = 0; break; } next_non_white(pi); } if ('?' == *pi->s) { pi->s++; } } else { pi->s++; } if (attrs_ok) { if ('>' != *pi->s++) { raise_error("invalid format, processing instruction not terminated", pi->str, pi->s); } } else { pi->s = cend + 1; } if (0 != pi->pcb->instruct) { if (attrs_ok) { pi->pcb->instruct(pi, target, attrs, 0); } else { pi->pcb->instruct(pi, target, attrs, content); } } } /* Entered after the "s; while (1) { c = *pi->s++; if ('\0' == c) { raise_error("invalid format, prolog not terminated", pi->str, pi->s); } else if ('<' == c) { depth++; } else if ('>' == c) { depth--; if (0 == depth) { /* done, at the end */ pi->s--; break; } } } *pi->s = '\0'; pi->s++; if (0 != pi->pcb->add_doctype) { pi->pcb->add_doctype(pi, docType); } } /* Entered after ""); if (0 == end) { raise_error("invalid format, comment not terminated", pi->str, pi->s); } for (s = end - 1; pi->s < s && !done; s--) { switch(*s) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: *(s + 1) = '\0'; done = 1; break; } } *end = '\0'; /* in case the comment was blank */ pi->s = end + 3; if (0 != pi->pcb->add_comment) { pi->pcb->add_comment(pi, comment); } } /* Entered after the '<' and the first character after that. Returns status * code. */ static void read_element(PInfo pi) { struct _Attr attrs[MAX_ATTRS]; Attr ap = attrs; char *name; char *ename; char *end; char c; long elen; int hasChildren = 0; int done = 0; ename = read_name_token(pi); end = pi->s; elen = end - ename; next_non_white(pi); c = *pi->s; *end = '\0'; if ('/' == c) { /* empty element, no attributes and no children */ pi->s++; if ('>' != *pi->s) { /*printf("*** '%s' ***\n", pi->s); */ raise_error("invalid format, element not closed", pi->str, pi->s); } pi->s++; /* past > */ ap->name = 0; pi->pcb->add_element(pi, ename, attrs, hasChildren); pi->pcb->end_element(pi, ename); return; } /* read attribute names until the close (/ or >) is reached */ while (!done) { if ('\0' == c) { next_non_white(pi); c = *pi->s; } switch (c) { case '\0': raise_error("invalid format, document not terminated", pi->str, pi->s); case '/': /* Element with just attributes. */ pi->s++; if ('>' != *pi->s) { raise_error("invalid format, element not closed", pi->str, pi->s); } pi->s++; ap->name = 0; pi->pcb->add_element(pi, ename, attrs, hasChildren); pi->pcb->end_element(pi, ename); return; case '>': /* has either children or a value */ pi->s++; hasChildren = 1; done = 1; ap->name = 0; pi->pcb->add_element(pi, ename, attrs, hasChildren); break; default: /* Attribute name so it's an element and the attribute will be */ /* added to it. */ ap->name = read_name_token(pi); end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { raise_error("invalid format, no attribute value", pi->str, pi->s); } *end = '\0'; /* terminate name */ /* read value */ next_non_white(pi); ap->value = read_quoted_value(pi); if (0 != strchr(ap->value, '&')) { if (0 != collapse_special(pi, (char*)ap->value)) { raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s); } } ap++; if (MAX_ATTRS <= (ap - attrs)) { raise_error("too many attributes", pi->str, pi->s); } break; } c = '\0'; } if (hasChildren) { char *start; int first = 1; done = 0; /* read children */ while (!done) { start = pi->s; next_non_white(pi); c = *pi->s++; if ('\0' == c) { raise_error("invalid format, document not terminated", pi->str, pi->s); } if ('<' == c) { char *slash; switch (*pi->s) { case '!': /* better be a comment or CDATA */ pi->s++; if ('-' == *pi->s && '-' == *(pi->s + 1)) { pi->s += 2; read_comment(pi); } else if (0 == strncmp("[CDATA[", pi->s, 7)) { pi->s += 7; read_cdata(pi); } else { raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s); } break; case '?': /* processing instruction */ pi->s++; read_instruction(pi); break; case '/': slash = pi->s; pi->s++; name = read_name_token(pi); end = pi->s; next_non_white(pi); c = *pi->s; *end = '\0'; if (0 != strcmp(name, ename)) { raise_error("invalid format, elements overlap", pi->str, pi->s); } if ('>' != c) { raise_error("invalid format, element not closed", pi->str, pi->s); } if (first && start != slash - 1) { /* some white space between start and here so add as text */ *(slash - 1) = '\0'; pi->pcb->add_text(pi, start, 1); } pi->s++; pi->pcb->end_element(pi, ename); return; case '\0': raise_error("invalid format, document not terminated", pi->str, pi->s); default: first = 0; /* a child element */ read_element(pi); break; } } else { /* read as TEXT */ pi->s = start; /*pi->s--; */ read_text(pi); /*read_reduced_text(pi); */ /* to exit read_text with no errors the next character must be < */ if ('/' == *(pi->s + 1) && 0 == strncmp(ename, pi->s + 2, elen) && '>' == *(pi->s + elen + 2)) { /* close tag after text so treat as a value */ pi->s += elen + 3; pi->pcb->end_element(pi, ename); return; } } } } } static void read_text(PInfo pi) { char buf[MAX_TEXT_LEN]; char *b = buf; char *alloc_buf = 0; char *end = b + sizeof(buf) - 2; char c; int done = 0; while (!done) { c = *pi->s++; switch(c) { case '<': done = 1; pi->s--; break; case '\0': raise_error("invalid format, document not terminated", pi->str, pi->s); default: if (end <= (b + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */ unsigned long size; if (0 == alloc_buf) { size = sizeof(buf) * 2; alloc_buf = ALLOC_N(char, size); memcpy(alloc_buf, buf, b - buf); b = alloc_buf + (b - buf); } else { unsigned long pos = b - alloc_buf; size = (end - alloc_buf) * 2; REALLOC_N(alloc_buf, char, size); b = alloc_buf + pos; } end = alloc_buf + size - 2; } if ('&' == c) { b = read_coded_chars(pi, b); } else { *b++ = c; } break; } } *b = '\0'; if (0 != alloc_buf) { pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1))); xfree(alloc_buf); } else { pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1))); } } #if 0 static void read_reduced_text(PInfo pi) { char buf[MAX_TEXT_LEN]; char *b = buf; char *alloc_buf = 0; char *end = b + sizeof(buf) - 2; char c; int spc = 0; int done = 0; while (!done) { c = *pi->s++; switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': spc = 1; break; case '<': done = 1; pi->s--; break; case '\0': raise_error("invalid format, document not terminated", pi->str, pi->s); default: if (end <= (b + spc + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */ unsigned long size; if (0 == alloc_buf) { size = sizeof(buf) * 2; alloc_buf = ALLOC_N(char, size); memcpy(alloc_buf, buf, b - buf); b = alloc_buf + (b - buf); } else { unsigned long pos = b - alloc_buf; size = (end - alloc_buf) * 2; REALLOC(alloc_buf, char, size); b = alloc_buf + pos; } end = alloc_buf + size - 2; } if (spc) { *b++ = ' '; } spc = 0; if ('&' == c) { b = read_coded_chars(pi, b); } else { *b++ = c; } break; } } *b = '\0'; if (0 != alloc_buf) { pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1))); xfree(alloc_buf); } else { pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1))); } } #endif static char* read_name_token(PInfo pi) { char *start; next_non_white(pi); start = pi->s; for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '?': case '=': case '/': case '>': case '\n': case '\r': return start; case '\0': /* documents never terminate after a name token */ raise_error("invalid format, document not terminated", pi->str, pi->s); break; /* to avoid warnings */ default: break; } } return start; } static void read_cdata(PInfo pi) { char *start; char *end; start = pi->s; end = strstr(pi->s, "]]>"); if (end == 0) { raise_error("invalid format, CDATA not terminated", pi->str, pi->s); } *end = '\0'; pi->s = end + 3; if (0 != pi->pcb->add_cdata) { pi->pcb->add_cdata(pi, start, end - start); } } inline static void next_non_token(PInfo pi) { for (; 1; pi->s++) { switch(*pi->s) { case ' ': case '\t': case '\f': case '\n': case '\r': case '/': case '>': return; default: break; } } } /* Assume the value starts immediately and goes until the quote character is * reached again. Do not read the character after the terminating quote. */ static char* read_quoted_value(PInfo pi) { char *value = 0; if ('"' == *pi->s || ('\'' == *pi->s && StrictEffort != pi->options->effort)) { char term = *pi->s; pi->s++; /* skip quote character */ value = pi->s; for (; *pi->s != term; pi->s++) { if ('\0' == *pi->s) { raise_error("invalid format, document not terminated", pi->str, pi->s); } } *pi->s = '\0'; /* terminate value */ pi->s++; /* move past quote */ } else if (StrictEffort == pi->options->effort) { raise_error("invalid format, expected a quote character", pi->str, pi->s); } else { value = pi->s; next_white(pi); if ('\0' == *pi->s) { raise_error("invalid format, document not terminated", pi->str, pi->s); } *pi->s++ = '\0'; /* terminate value */ } return value; } static char* read_hex_uint64(char *b, uint64_t *up) { uint64_t u = 0; char c; for (; ';' != *b; b++) { c = *b; if ('0' <= c && c <= '9') { u = (u << 4) | (uint64_t)(c - '0'); } else if ('a' <= c && c <= 'f') { u = (u << 4) | (uint64_t)(c - 'a' + 10); } else if ('A' <= c && c <= 'F') { u = (u << 4) | (uint64_t)(c - 'A' + 10); } else { return 0; } } *up = u; return b; } static char* read_10_uint64(char *b, uint64_t *up) { uint64_t u = 0; char c; for (; ';' != *b; b++) { c = *b; if ('0' <= c && c <= '9') { u = (u * 10) + (uint64_t)(c - '0'); } else { return 0; } } *up = u; return b; } /* u0000..u007F 00000000000000xxxxxxx 0xxxxxxx u0080..u07FF 0000000000yyyyyxxxxxx 110yyyyy 10xxxxxx u0800..uD7FF, uE000..uFFFF 00000zzzzyyyyyyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx u10000..u10FFFF uuuzzzzzzyyyyyyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */ static char* ucs_to_utf8_chars(char *text, uint64_t u) { int reading = 0; int i; unsigned char c; if (u <= 0x000000000000007FULL) { /* 0xxxxxxx */ *text++ = (char)u; } else if (u <= 0x00000000000007FFULL) { /* 110yyyyy 10xxxxxx */ *text++ = (char)(0x00000000000000C0ULL | (0x000000000000001FULL & (u >> 6))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u)); } else if (u <= 0x000000000000D7FFULL || (0x000000000000E000ULL <= u && u <= 0x000000000000FFFFULL)) { /* 1110zzzz 10yyyyyy 10xxxxxx */ *text++ = (char)(0x00000000000000E0ULL | (0x000000000000000FULL & (u >> 12))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u)); } else if (0x0000000000010000ULL <= u && u <= 0x000000000010FFFFULL) { /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */ *text++ = (char)(0x00000000000000F0ULL | (0x0000000000000007ULL & (u >> 18))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 12))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u)); } else { /* assume it is UTF-8 encoded directly and not UCS */ for (i = 56; 0 <= i; i -= 8) { c = (unsigned char)((u >> i) & 0x00000000000000FFULL); if (reading) { *text++ = (char)c; } else if ('\0' != c) { *text++ = (char)c; reading = 1; } } } return text; } static char* read_coded_chars(PInfo pi, char *text) { char *b, buf[32]; char *end = buf + sizeof(buf) - 1; char *s; for (b = buf, s = pi->s; b < end; b++, s++) { *b = *s; if (';' == *s) { *(b + 1) = '\0'; s++; break; } } if (b > end) { *text++ = *pi->s; } else if ('#' == *buf) { uint64_t u = 0; b = buf + 1; if ('x' == *b || 'X' == *b) { b = read_hex_uint64(b + 1, &u); } else { b = read_10_uint64(b, &u); } if (0 == b) { *text++ = *pi->s; } else { pi->s = s; if (u <= 0x000000000000007FULL) { *text++ = (char)u; #if HAS_PRIVATE_ENCODING } else if (ox_utf8_encoding == pi->options->rb_enc || 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) { #else } else if (ox_utf8_encoding == pi->options->rb_enc) { #endif text = ucs_to_utf8_chars(text, u); #if HAS_PRIVATE_ENCODING } else if (Qnil == pi->options->rb_enc) { #else } else if (0 == pi->options->rb_enc) { #endif pi->options->rb_enc = ox_utf8_encoding; text = ucs_to_utf8_chars(text, u); } else { /*raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */ raise_error("Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); } } } else if (0 == strcasecmp(buf, "nbsp;")) { pi->s = s; *text++ = ' '; } else if (0 == strcasecmp(buf, "lt;")) { pi->s = s; *text++ = '<'; } else if (0 == strcasecmp(buf, "gt;")) { pi->s = s; *text++ = '>'; } else if (0 == strcasecmp(buf, "amp;")) { pi->s = s; *text++ = '&'; } else if (0 == strcasecmp(buf, "quot;")) { pi->s = s; *text++ = '"'; } else if (0 == strcasecmp(buf, "apos;")) { pi->s = s; *text++ = '\''; } else { *text++ = *pi->s; } return text; } static int collapse_special(PInfo pi, char *str) { char *s = str; char *b = str; while ('\0' != *s) { if ('&' == *s) { int c; char *end; s++; if ('#' == *s) { uint64_t u = 0; s++; if ('x' == *s || 'X' == *s) { s++; end = read_hex_uint64(s, &u); } else { end = read_10_uint64(s, &u); } if (0 == end) { return EDOM; } if (u <= 0x000000000000007FULL) { *b++ = (char)u; #if HAS_PRIVATE_ENCODING } else if (ox_utf8_encoding == pi->options->rb_enc || 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) { #else } else if (ox_utf8_encoding == pi->options->rb_enc) { #endif b = ucs_to_utf8_chars(b, u); /* TBD support UTF-16 */ #if HAS_PRIVATE_ENCODING } else if (Qnil == pi->options->rb_enc) { #else } else if (0 == pi->options->rb_enc) { #endif pi->options->rb_enc = ox_utf8_encoding; b = ucs_to_utf8_chars(b, u); } else { /* raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/ raise_error("Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); } s = end + 1; } else { if (0 == strncasecmp(s, "lt;", 3)) { c = '<'; s += 3; } else if (0 == strncasecmp(s, "gt;", 3)) { c = '>'; s += 3; } else if (0 == strncasecmp(s, "amp;", 4)) { c = '&'; s += 4; } else if (0 == strncasecmp(s, "quot;", 5)) { c = '"'; s += 5; } else if (0 == strncasecmp(s, "apos;", 5)) { c = '\''; s += 5; } else { c = '?'; while (';' != *s++) { if ('\0' == *s) { return EDOM; } } s++; } *b++ = (char)c; } } else { *b++ = *s++; } } *b = '\0'; return 0; } ox-1.8.9/ext/ox/cache_test.c0000644000004100000410000000477412111637206015716 0ustar www-datawww-data/* cache_test.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cache.h" static const char *data[] = { "one", "two", "one", "onex", "oney", "one", "tw", 0 }; void ox_cache_test() { Cache c; const char **d; VALUE v; VALUE *slot = 0;; ox_cache_new(&c); for (d = data; 0 != *d; d++) { /*printf("*** cache_get on %s\n", *d);*/ v = ox_cache_get(c, *d, &slot, 0); if (Qundef == v) { if (0 == slot) { /*printf("*** failed to get a slot for %s\n", *d); */ } else { /*printf("*** added '%s' to cache\n", *d); */ v = ID2SYM(rb_intern(*d)); *slot = v; } } else { VALUE rs = rb_funcall2(v, rb_intern("to_s"), 0, 0); printf("*** get on '%s' returned '%s' (%s)\n", *d, StringValuePtr(rs), rb_class2name(rb_obj_class(v))); } /*ox_cache_print(c);*/ } ox_cache_print(c); } ox-1.8.9/ext/ox/base64.h0000644000004100000410000000360212111637206014672 0ustar www-datawww-data/* base64.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __BASE64_H__ #define __BASE64_H__ typedef unsigned char uchar; #define b64_size(len) ((len + 2) / 3 * 4) extern unsigned long b64_orig_size(const char *text); extern void to_base64(const uchar *src, int len, char *b64); extern void from_base64(const char *b64, uchar *str); #endif /* __BASE64_H__ */ ox-1.8.9/ext/ox/base64.c0000644000004100000410000001073312111637206014670 0ustar www-datawww-data/* base64.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "base64.h" static char digits[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /* invalid or terminating characters are set to 'X' or \x58 */ static uchar s_digits[256] = "\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x3E\x58\x58\x58\x3F\ \x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x58\x58\x58\x58\x58\x58\ \x58\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\ \x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x58\x58\x58\x58\x58\ \x58\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\ \x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58"; void to_base64(const uchar *src, int len, char *b64) { const uchar *end3; int len3 = len % 3; uchar b1, b2, b3; end3 = src + (len - len3); while (src < end3) { b1 = *src++; b2 = *src++; b3 = *src++; *b64++ = digits[(uchar)(b1 >> 2)]; *b64++ = digits[(uchar)(((b1 & 0x03) << 4) | (b2 >> 4))]; *b64++ = digits[(uchar)(((b2 & 0x0F) << 2) | (b3 >> 6))]; *b64++ = digits[(uchar)(b3 & 0x3F)]; } if (1 == len3) { b1 = *src++; *b64++ = digits[b1 >> 2]; *b64++ = digits[(b1 & 0x03) << 4]; *b64++ = '='; *b64++ = '='; } else if (2 == len3) { b1 = *src++; b2 = *src++; *b64++ = digits[b1 >> 2]; *b64++ = digits[((b1 & 0x03) << 4) | (b2 >> 4)]; *b64++ = digits[(b2 & 0x0F) << 2]; *b64++ = '='; } *b64 = '\0'; } unsigned long b64_orig_size(const char *text) { const char *start = text; unsigned long size = 0; if ('\0' != *text) { for (; 0 != *text; text++) { } size = (text - start) * 3 / 4; text--; if ('=' == *text) { size--; text--; if ('=' == *text) { size--; } } } return size; } void from_base64(const char *b64, uchar *str) { uchar b0, b1, b2, b3; while (1) { if ('X' == (b0 = s_digits[(uchar)*b64++])) { break; } if ('X' == (b1 = s_digits[(uchar)*b64++])) { break; } *str++ = (b0 << 2) | ((b1 >> 4) & 0x03); if ('X' == (b2 = s_digits[(uchar)*b64++])) { break; } *str++ = (b1 << 4) | ((b2 >> 2) & 0x0F); if ('X' == (b3 = s_digits[(uchar)*b64++])) { break; } *str++ = (b2 << 6) | b3; } *str = '\0'; } ox-1.8.9/ext/ox/dump.c0000644000004100000410000010571012111637206014551 0ustar www-datawww-data/* dump.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include "base64.h" #include "cache8.h" #include "ox.h" #define USE_B64 0 typedef unsigned long ulong; typedef struct _Str { const char *str; size_t len; } *Str; typedef struct _Element { struct _Str clas; struct _Str attr; unsigned long id; int indent; /* < 0 indicates no \n */ int closed; char type; } *Element; typedef struct _Out { void (*w_start)(struct _Out *out, Element e); void (*w_end)(struct _Out *out, Element e); void (*w_time)(struct _Out *out, VALUE obj); char *buf; char *end; char *cur; Cache8 circ_cache; unsigned long circ_cnt; int indent; int depth; /* used by dumpHash */ Options opts; VALUE obj; } *Out; static void dump_obj_to_xml(VALUE obj, Options copts, Out out); static void dump_first_obj(VALUE obj, Out out); static void dump_obj(ID aid, VALUE obj, unsigned int depth, Out out); static void dump_gen_doc(VALUE obj, unsigned int depth, Out out); static void dump_gen_element(VALUE obj, unsigned int depth, Out out); static void dump_gen_instruct(VALUE obj, unsigned int depth, Out out); static int dump_gen_attr(VALUE key, VALUE value, Out out); static int dump_gen_nodes(VALUE obj, unsigned int depth, Out out); static void dump_gen_val_node(VALUE obj, unsigned int depth, const char *pre, size_t plen, const char *suf, size_t slen, Out out); static void dump_start(Out out, Element e); static void dump_end(Out out, Element e); static void grow(Out out, size_t len); static void dump_value(Out out, const char *value, size_t size); static void dump_str_value(Out out, const char *value, size_t size); static int dump_var(ID key, VALUE value, Out out); static void dump_num(Out out, VALUE obj); static void dump_date(Out out, VALUE obj); static void dump_time_thin(Out out, VALUE obj); static void dump_time_xsd(Out out, VALUE obj); static int dump_hash(VALUE key, VALUE value, Out out); static int is_xml_friendly(const uchar *str, int len); static const char hex_chars[17] = "0123456789abcdef"; static char xml_friendly_chars[256] = "\ 88888888811881888888888888888888\ 11611156111111111111111111114141\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111"; inline static int is_xml_friendly(const uchar *str, int len) { for (; 0 < len; str++, len--) { if ('1' != xml_friendly_chars[*str]) { return 0; } } return 1; } inline static size_t xml_str_len(const uchar *str, size_t len) { size_t size = 0; for (; 0 < len; str++, len--) { size += xml_friendly_chars[*str]; } return size - len * (size_t)'0'; } inline static void dump_hex(uchar c, Out out) { uchar d = (c >> 4) & 0x0F; *out->cur++ = hex_chars[d]; d = c & 0x0F; *out->cur++ = hex_chars[d]; } static Type obj_class_code(VALUE obj) { VALUE clas = rb_obj_class(obj); switch (rb_type(obj)) { case T_NIL: return NilClassCode; case T_ARRAY: return ArrayCode; case T_HASH: return HashCode; case T_TRUE: return TrueClassCode; case T_FALSE: return FalseClassCode; case T_FIXNUM: return FixnumCode; case T_FLOAT: return FloatCode; case T_STRING: return (is_xml_friendly((uchar*)StringValuePtr(obj), (int)RSTRING_LEN(obj))) ? StringCode : String64Code; case T_SYMBOL: { const char *sym = rb_id2name(SYM2ID(obj)); return (is_xml_friendly((uchar*)sym, (int)strlen(sym))) ? SymbolCode : Symbol64Code; } case T_DATA: return (rb_cTime == clas) ? TimeCode : ((ox_date_class == clas) ? DateCode : 0); case T_STRUCT: return (rb_cRange == clas) ? RangeCode : StructCode; case T_OBJECT: return (ox_document_clas == clas || ox_element_clas == clas) ? RawCode : ObjectCode; case T_REGEXP: return RegexpCode; case T_BIGNUM: return BignumCode; #ifdef T_COMPLEX case T_COMPLEX: return ComplexCode; #endif #ifdef T_RATIONAL case T_RATIONAL: return RationalCode; #endif case T_CLASS: return ClassCode; default: return 0; } } inline static void fill_indent(Out out, int cnt) { if (0 <= cnt) { *out->cur++ = '\n'; for (; 0 < cnt; cnt--) { *out->cur++ = ' '; } } } inline static void fill_value(Out out, const char *value, size_t len) { if (6 < len) { memcpy(out->cur, value, len); out->cur += len; } else { for (; '\0' != *value; value++) { *out->cur++ = *value; } } } inline static void fill_attr(Out out, char name, const char *value, size_t len) { *out->cur++ = ' '; *out->cur++ = name; *out->cur++ = '='; *out->cur++ = '"'; if (6 < len) { memcpy(out->cur, value, len); out->cur += len; } else { for (; '\0' != *value; value++) { *out->cur++ = *value; } } *out->cur++ = '"'; } inline static const char* ulong2str(ulong num, char *end) { char *b; *end-- = '\0'; for (b = end; 0 < num || b == end; num /= 10, b--) { *b = (num % 10) + '0'; } b++; return b; } static int check_circular(Out out, VALUE obj, Element e) { slot_t *slot; slot_t id; int result; if (0 == (id = ox_cache8_get(out->circ_cache, obj, &slot))) { out->circ_cnt++; id = out->circ_cnt; *slot = id; e->id = id; result = 0; } else { e->type = RefCode; e->clas.len = 0; e->clas.str = 0; e->closed = 1; e->id = id; out->w_start(out, e); result = 1; } return result; } static void grow(Out out, size_t len) { size_t size = out->end - out->buf; long pos = out->cur - out->buf; size *= 2; if (size <= len * 2 + pos) { size += len; } REALLOC_N(out->buf, char, size + 10); /* 10 extra for terminator character plus extra (paranoid) */ out->end = out->buf + size; out->cur = out->buf + pos; } static void dump_start(Out out, Element e) { size_t size = e->indent + 4; if (0 < e->attr.len) { /* a="attr" */ size += e->attr.len + 5; } if (0 < e->clas.len) { /* c="class" */ size += e->clas.len + 5; } if (0 < e->id) { /* i="id" */ size += 24; /* over estimate, 19 digits */ } if (out->end - out->cur <= (long)size) { grow(out, size); } if (out->buf < out->cur) { fill_indent(out, e->indent); } *out->cur++ = '<'; *out->cur++ = e->type; if (0 < e->attr.len) { fill_attr(out, 'a', e->attr.str, e->attr.len); } if ((ObjectCode == e->type || ExceptionCode == e->type || StructCode == e->type || ClassCode == e->type) && 0 < e->clas.len) { fill_attr(out, 'c', e->clas.str, e->clas.len); } if (0 < e->id) { char buf[32]; char *end = buf + sizeof(buf) - 1; const char *s = ulong2str(e->id, end); fill_attr(out, 'i', s, end - s); } if (e->closed) { *out->cur++ = '/'; } *out->cur++ = '>'; *out->cur = '\0'; } static void dump_end(Out out, Element e) { size_t size = e->indent + 5; if (out->end - out->cur <= (long)size) { grow(out, size); } fill_indent(out, e->indent); *out->cur++ = '<'; *out->cur++ = '/'; *out->cur++ = e->type; *out->cur++ = '>'; *out->cur = '\0'; } inline static void dump_value(Out out, const char *value, size_t size) { if (out->end - out->cur <= (long)size) { grow(out, size); } if (6 < size) { memcpy(out->cur, value, size); out->cur += size; } else { for (; '\0' != *value; value++) { *out->cur++ = *value; } } *out->cur = '\0'; } inline static void dump_str_value(Out out, const char *value, size_t size) { size_t xsize = xml_str_len((const uchar*)value, size); if (out->end - out->cur <= (long)xsize) { grow(out, xsize); } for (; '\0' != *value; value++) { if ('1' == xml_friendly_chars[(uchar)*value]) { *out->cur++ = *value; } else { *out->cur++ = '&'; switch (*value) { case '"': *out->cur++ = 'q'; *out->cur++ = 'u'; *out->cur++ = 'o'; *out->cur++ = 't'; break; case '&': *out->cur++ = 'a'; *out->cur++ = 'm'; *out->cur++ = 'p'; break; case '\'': *out->cur++ = 'a'; *out->cur++ = 'p'; *out->cur++ = 'o'; *out->cur++ = 's'; break; case '<': *out->cur++ = 'l'; *out->cur++ = 't'; break; case '>': *out->cur++ = 'g'; *out->cur++ = 't'; break; default: *out->cur++ = '#'; *out->cur++ = 'x'; *out->cur++ = '0'; *out->cur++ = '0'; dump_hex(*value, out); break; } *out->cur++ = ';'; } } *out->cur = '\0'; } inline static void dump_num(Out out, VALUE obj) { char buf[32]; char *b = buf + sizeof(buf) - 1; long num = NUM2LONG(obj); int neg = 0; if (0 > num) { neg = 1; num = -num; } *b-- = '\0'; if (0 < num) { for (; 0 < num; num /= 10, b--) { *b = (num % 10) + '0'; } if (neg) { *b = '-'; } else { b++; } } else { *b = '0'; } if (out->end - out->cur <= (long)(sizeof(buf) - (b - buf))) { grow(out, sizeof(buf) - (b - buf)); } for (; '\0' != *b; b++) { *out->cur++ = *b; } *out->cur = '\0'; } static void dump_time_thin(Out out, VALUE obj) { char buf[64]; char *b = buf + sizeof(buf) - 1; #if HAS_RB_TIME_TIMESPEC struct timespec ts = rb_time_timespec(obj); time_t sec = ts.tv_sec; long nsec = ts.tv_nsec; #else time_t sec = NUM2LONG(rb_funcall2(obj, ox_tv_sec_id, 0, 0)); #if HAS_NANO_TIME long nsec = NUM2LONG(rb_funcall2(obj, ox_tv_nsec_id, 0, 0)); #else long nsec = NUM2LONG(rb_funcall2(obj, ox_tv_usec_id, 0, 0)) * 1000; #endif #endif char *dot = b - 10; long size; *b-- = '\0'; for (; dot < b; b--, nsec /= 10) { *b = '0' + (nsec % 10); } *b-- = '.'; for (; 0 < sec; b--, sec /= 10) { *b = '0' + (sec % 10); } b++; size = sizeof(buf) - (b - buf) - 1; if (out->end - out->cur <= size) { grow(out, size); } memcpy(out->cur, b, size); out->cur += size; } static void dump_date(Out out, VALUE obj) { char buf[64]; char *b = buf + sizeof(buf) - 1; long jd = NUM2LONG(rb_funcall2(obj, ox_jd_id, 0, 0)); long size; *b-- = '\0'; for (; 0 < jd; b--, jd /= 10) { *b = '0' + (jd % 10); } b++; if ('\0' == *b) { b--; *b = '0'; } size = sizeof(buf) - (b - buf) - 1; if (out->end - out->cur <= size) { grow(out, size); } memcpy(out->cur, b, size); out->cur += size; } static void dump_time_xsd(Out out, VALUE obj) { struct tm *tm; #if HAS_RB_TIME_TIMESPEC struct timespec ts = rb_time_timespec(obj); time_t sec = ts.tv_sec; long nsec = ts.tv_nsec; #else time_t sec = NUM2LONG(rb_funcall2(obj, ox_tv_sec_id, 0, 0)); #if HAS_NANO_TIME long nsec = NUM2LONG(rb_funcall2(obj, ox_tv_nsec_id, 0, 0)); #else long nsec = NUM2LONG(rb_funcall2(obj, ox_tv_usec_id, 0, 0)) * 1000; #endif #endif int tzhour, tzmin; char tzsign = '+'; if (out->end - out->cur <= 33) { grow(out, 33); } /* 2010-07-09T10:47:45.895826+09:00 */ tm = localtime(&sec); #if HAS_TM_GMTOFF if (0 > tm->tm_gmtoff) { tzsign = '-'; tzhour = (int)(tm->tm_gmtoff / -3600); tzmin = (int)(tm->tm_gmtoff / -60) - (tzhour * 60); } else { tzhour = (int)(tm->tm_gmtoff / 3600); tzmin = (int)(tm->tm_gmtoff / 60) - (tzhour * 60); } #else tzhour = 0; tzmin = 0; #endif /* TBD replace with more efficient printer */ out->cur += sprintf(out->cur, "%04d-%02d-%02dT%02d:%02d:%02d.%06ld%c%02d:%02d", tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, nsec / 1000, tzsign, tzhour, tzmin); } static void dump_first_obj(VALUE obj, Out out) { char buf[128]; Options copts = out->opts; int cnt; if (Yes == copts->with_xml) { if ('\0' == *copts->encoding) { dump_value(out, "", 21); } else { cnt = sprintf(buf, "", copts->encoding); dump_value(out, buf, cnt); } } if (Yes == copts->with_instruct) { cnt = sprintf(buf, "%s", (out->buf < out->cur) ? "\n" : "", (Yes == copts->circular) ? " circular=\"yes\"" : ((No == copts->circular) ? " circular=\"no\"" : ""), (Yes == copts->xsd_date) ? " xsd_date=\"yes\"" : ((No == copts->xsd_date) ? " xsd_date=\"no\"" : "")); dump_value(out, buf, cnt); } if (Yes == copts->with_dtd) { cnt = sprintf(buf, "%s", (out->buf < out->cur) ? "\n" : "", obj_class_code(obj)); dump_value(out, buf, cnt); } dump_obj(0, obj, 0, out); } static void dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) { struct _Element e; VALUE prev_obj = out->obj; char value_buf[64]; int cnt; out->obj = obj; if (0 == aid) { /*e.attr.str = 0; */ e.attr.len = 0; } else { e.attr.str = rb_id2name(aid); e.attr.len = strlen(e.attr.str); } e.closed = 0; if (0 == depth) { e.indent = (0 <= out->indent) ? 0 : -1; } else if (0 > out->indent) { e.indent = -1; } else if (0 == out->indent) { e.indent = 0; } else { e.indent = depth * out->indent; } e.id = 0; e.clas.len = 0; e.clas.str = 0; switch (rb_type(obj)) { case T_NIL: e.type = NilClassCode; e.closed = 1; out->w_start(out, &e); break; case T_ARRAY: if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } cnt = (int)RARRAY_LEN(obj); e.type = ArrayCode; e.closed = (0 >= cnt); out->w_start(out, &e); if (!e.closed) { VALUE *np = RARRAY_PTR(obj); int i; int d2 = depth + 1; for (i = cnt; 0 < i; i--, np++) { dump_obj(0, *np, d2, out); } out->w_end(out, &e); } break; case T_HASH: if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } cnt = (int)RHASH_SIZE(obj); e.type = HashCode; e.closed = (0 >= cnt); out->w_start(out, &e); if (0 < cnt) { unsigned int od = out->depth; out->depth = depth + 1; rb_hash_foreach(obj, dump_hash, (VALUE)out); out->depth = od; out->w_end(out, &e); } break; case T_TRUE: e.type = TrueClassCode; e.closed = 1; out->w_start(out, &e); break; case T_FALSE: e.type = FalseClassCode; e.closed = 1; out->w_start(out, &e); break; case T_FIXNUM: e.type = FixnumCode; out->w_start(out, &e); dump_num(out, obj); e.indent = -1; out->w_end(out, &e); break; case T_FLOAT: e.type = FloatCode; cnt = sprintf(value_buf, "%0.16g", rb_num2dbl(obj)); /* used sprintf due to bug in snprintf */ out->w_start(out, &e); dump_value(out, value_buf, cnt); e.indent = -1; out->w_end(out, &e); break; case T_STRING: { const char *str; if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } str = StringValuePtr(obj); cnt = (int)RSTRING_LEN(obj); #if USE_B64 if (is_xml_friendly((uchar*)str, cnt)) { e.type = StringCode; out->w_start(out, &e); dump_str_value(out, str, cnt); e.indent = -1; out->w_end(out, &e); } else { ulong size = b64_size(cnt); char *b64 = ALLOCA_N(char, size + 1); e.type = String64Code; to_base64((uchar*)str, cnt, b64); out->w_start(out, &e); dump_value(out, b64, size); e.indent = -1; out->w_end(out, &e); } #else e.type = StringCode; out->w_start(out, &e); dump_str_value(out, str, cnt); e.indent = -1; out->w_end(out, &e); #endif break; } case T_SYMBOL: { const char *sym = rb_id2name(SYM2ID(obj)); cnt = (int)strlen(sym); #if USE_B64 if (is_xml_friendly((uchar*)sym, cnt)) { e.type = SymbolCode; out->w_start(out, &e); dump_str_value(out, sym, cnt); e.indent = -1; out->w_end(out, &e); } else { ulong size = b64_size(cnt); char *b64 = ALLOCA_N(char, size + 1); e.type = Symbol64Code; to_base64((uchar*)sym, cnt, b64); out->w_start(out, &e); dump_value(out, b64, size); e.indent = -1; out->w_end(out, &e); } #else e.type = SymbolCode; out->w_start(out, &e); dump_str_value(out, sym, cnt); e.indent = -1; out->w_end(out, &e); #endif break; } case T_DATA: { VALUE clas; clas = rb_obj_class(obj); if (rb_cTime == clas) { e.type = TimeCode; out->w_start(out, &e); out->w_time(out, obj); e.indent = -1; out->w_end(out, &e); } else { const char *classname = rb_class2name(clas); if (0 == strcmp("Date", classname)) { e.type = DateCode; out->w_start(out, &e); dump_date(out, obj); e.indent = -1; out->w_end(out, &e); } else { if (StrictEffort == out->opts->effort) { rb_raise(rb_eNotImpError, "Failed to dump T_DATA %s\n", classname); } else { e.type = NilClassCode; e.closed = 1; out->w_start(out, &e); } } } break; } case T_STRUCT: { #if HAS_RSTRUCT VALUE clas; if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } clas = rb_obj_class(obj); if (rb_cRange == clas) { VALUE beg = RSTRUCT_PTR(obj)[0]; VALUE end = RSTRUCT_PTR(obj)[1]; VALUE excl = RSTRUCT_PTR(obj)[2]; int d2 = depth + 1; e.type = RangeCode; e.clas.len = 5; e.clas.str = "Range"; out->w_start(out, &e); dump_obj(ox_beg_id, beg, d2, out); dump_obj(ox_end_id, end, d2, out); dump_obj(ox_excl_id, excl, d2, out); out->w_end(out, &e); } else { char num_buf[16]; VALUE *vp; int i; int d2 = depth + 1; e.type = StructCode; e.clas.str = rb_class2name(clas); e.clas.len = strlen(e.clas.str); out->w_start(out, &e); cnt = (int)RSTRUCT_LEN(obj); for (i = 0, vp = RSTRUCT_PTR(obj); i < cnt; i++, vp++) { dump_obj(rb_intern(ulong2str(i, num_buf + sizeof(num_buf) - 1)), *vp, d2, out); } out->w_end(out, &e); } #else e.type = NilClassCode; e.closed = 1; out->w_start(out, &e); #endif break; } case T_OBJECT: { VALUE clas; if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } clas = rb_obj_class(obj); e.clas.str = rb_class2name(clas); e.clas.len = strlen(e.clas.str); if (ox_document_clas == clas) { e.type = RawCode; out->w_start(out, &e); dump_gen_doc(obj, depth + 1, out); out->w_end(out, &e); } else if (ox_element_clas == clas) { e.type = RawCode; out->w_start(out, &e); dump_gen_element(obj, depth + 1, out); out->w_end(out, &e); } else { /* Object */ #if HAS_IVAR_HELPERS e.type = (Qtrue == rb_obj_is_kind_of(obj, rb_eException)) ? ExceptionCode : ObjectCode; cnt = (int)rb_ivar_count(obj); e.closed = (0 >= cnt); out->w_start(out, &e); if (0 < cnt) { unsigned int od = out->depth; out->depth = depth + 1; rb_ivar_foreach(obj, dump_var, (VALUE)out); out->depth = od; out->w_end(out, &e); } #else /*VALUE vars = rb_obj_instance_variables(obj); */ /*#else */ VALUE vars = rb_funcall2(obj, rb_intern("instance_variables"), 0, 0); /*#endif */ e.type = (Qtrue == rb_obj_is_kind_of(obj, rb_eException)) ? ExceptionCode : ObjectCode; cnt = (int)RARRAY_LEN(vars); e.closed = (0 >= cnt); out->w_start(out, &e); if (0 < cnt) { VALUE *np = RARRAY_PTR(vars); ID vid; unsigned int od = out->depth; int i; out->depth = depth + 1; for (i = cnt; 0 < i; i--, np++) { vid = rb_to_id(*np); dump_var(vid, rb_ivar_get(obj, vid), out); } out->depth = od; out->w_end(out, &e); } #endif } break; } case T_REGEXP: { #if 1 VALUE rs = rb_funcall2(obj, ox_inspect_id, 0, 0); const char *s = StringValuePtr(rs); cnt = (int)RSTRING_LEN(rs); #else const char *s = RREGEXP_SRC_PTR(obj); int options = rb_reg_options(obj); cnt = (int)RREGEXP_SRC_LEN(obj); #endif e.type = RegexpCode; out->w_start(out, &e); #if USE_B64 if (is_xml_friendly((uchar*)s, cnt)) { /*dump_value(out, "/", 1); */ dump_str_value(out, s, cnt); } else { ulong size = b64_size(cnt); char *b64 = ALLOCA_N(char, size + 1); to_base64((uchar*)s, cnt, b64); dump_value(out, b64, size); } #else dump_str_value(out, s, cnt); #endif #if 0 dump_value(out, "/", 1); if (0 != (ONIG_OPTION_MULTILINE & options)) { dump_value(out, "m", 1); } if (0 != (ONIG_OPTION_IGNORECASE & options)) { dump_value(out, "i", 1); } if (0 != (ONIG_OPTION_EXTEND & options)) { dump_value(out, "x", 1); } #endif e.indent = -1; out->w_end(out, &e); break; } case T_BIGNUM: { VALUE rs = rb_big2str(obj, 10); e.type = BignumCode; out->w_start(out, &e); dump_value(out, StringValuePtr(rs), RSTRING_LEN(rs)); e.indent = -1; out->w_end(out, &e); break; } #ifdef T_COMPLEX case T_COMPLEX: e.type = ComplexCode; out->w_start(out, &e); #ifdef RCOMPLEX dump_obj(0, RCOMPLEX(obj)->real, depth + 1, out); dump_obj(0, RCOMPLEX(obj)->imag, depth + 1, out); #else dump_obj(0, rb_funcall2(obj, rb_intern("real"), 0, 0), depth + 1, out); dump_obj(0, rb_funcall2(obj, rb_intern("imag"), 0, 0), depth + 1, out); #endif out->w_end(out, &e); break; #endif #ifdef T_RATIONAL case T_RATIONAL: e.type = RationalCode; out->w_start(out, &e); #ifdef RRATIONAL dump_obj(0, RRATIONAL(obj)->num, depth + 1, out); dump_obj(0, RRATIONAL(obj)->den, depth + 1, out); #else dump_obj(0, rb_funcall2(obj, rb_intern("numerator"), 0, 0), depth + 1, out); dump_obj(0, rb_funcall2(obj, rb_intern("denominator"), 0, 0), depth + 1, out); #endif out->w_end(out, &e); break; #endif case T_CLASS: { e.type = ClassCode; e.clas.str = rb_class2name(obj); e.clas.len = strlen(e.clas.str); e.closed = 1; out->w_start(out, &e); break; } default: if (StrictEffort == out->opts->effort) { rb_raise(rb_eNotImpError, "Failed to dump %s Object (%02x)\n", rb_obj_classname(obj), rb_type(obj)); } else { e.type = NilClassCode; e.closed = 1; out->w_start(out, &e); } break; } out->obj = prev_obj; } static int dump_var(ID key, VALUE value, Out out) { if (T_DATA == rb_type(value) && key == ox_mesg_id) { /* There is a secret recipe that keeps Exception mesg attributes as a * T_DATA until it is needed. The safe way around this hack is to call * the message() method and use the returned string as the * message. Not pretty but it solves the most common use of this * hack. If there are others they will have to be handled one at a * time. */ value = rb_funcall(out->obj, ox_message_id, 0); } dump_obj(key, value, out->depth, out); return ST_CONTINUE; } static int dump_hash(VALUE key, VALUE value, Out out) { dump_obj(0, key, out->depth, out); dump_obj(0, value, out->depth, out); return ST_CONTINUE; } static void dump_gen_doc(VALUE obj, unsigned int depth, Out out) { VALUE attrs = rb_attr_get(obj, ox_attributes_id); VALUE nodes = rb_attr_get(obj, ox_nodes_id); if ('\0' == *out->opts->encoding && Qnil != attrs) { VALUE renc = rb_hash_lookup(attrs, ox_encoding_sym); if (Qnil != renc) { const char *enc = StringValuePtr(renc); strncpy(out->opts->encoding, enc, sizeof(out->opts->encoding) - 1); } } if (Yes == out->opts->with_xml) { dump_value(out, "", 2); } if (Yes == out->opts->with_instruct) { if (out->buf < out->cur) { dump_value(out, "\n", 36); } else { dump_value(out, "", 35); } } if (Qnil != nodes) { dump_gen_nodes(nodes, depth, out); } } static void dump_gen_element(VALUE obj, unsigned int depth, Out out) { VALUE rname = rb_attr_get(obj, ox_at_value_id); VALUE attrs = rb_attr_get(obj, ox_attributes_id); VALUE nodes = rb_attr_get(obj, ox_nodes_id); const char *name = StringValuePtr(rname); long nlen = RSTRING_LEN(rname); size_t size; int indent; if (0 > out->indent) { indent = -1; } else if (0 == out->indent) { indent = 0; } else { indent = depth * out->indent; } size = indent + 4 + nlen; if (out->end - out->cur <= (long)size) { grow(out, size); } fill_indent(out, indent); *out->cur++ = '<'; fill_value(out, name, nlen); if (Qnil != attrs) { rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out); } if (Qnil != nodes) { int do_indent; *out->cur++ = '>'; do_indent = dump_gen_nodes(nodes, depth, out); if (out->end - out->cur <= (long)size) { grow(out, size); } if (do_indent) { fill_indent(out, indent); } *out->cur++ = '<'; *out->cur++ = '/'; fill_value(out, name, nlen); } else { *out->cur++ = '/'; } *out->cur++ = '>'; *out->cur = '\0'; } static void dump_gen_instruct(VALUE obj, unsigned int depth, Out out) { VALUE rname = rb_attr_get(obj, ox_at_value_id); VALUE attrs = rb_attr_get(obj, ox_attributes_id); VALUE rcontent = rb_attr_get(obj, ox_at_content_id); const char *name = StringValuePtr(rname); const char *content = 0; long nlen = RSTRING_LEN(rname); long clen = 0; size_t size; if (T_STRING == rb_type(rcontent)) { content = StringValuePtr(rcontent); clen = RSTRING_LEN(rcontent); size = 4 + nlen + clen; } else { size = 4 + nlen; } if (out->end - out->cur <= (long)size) { grow(out, size); } *out->cur++ = '<'; *out->cur++ = '?'; fill_value(out, name, nlen); if (0 != content) { fill_value(out, content, clen); } else if (Qnil != attrs) { rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out); } *out->cur++ = '?'; *out->cur++ = '>'; *out->cur = '\0'; } static int dump_gen_nodes(VALUE obj, unsigned int depth, Out out) { long cnt = RARRAY_LEN(obj); int indent_needed = 1; if (0 < cnt) { VALUE *np = RARRAY_PTR(obj); VALUE clas; int d2 = depth + 1; for (; 0 < cnt; cnt--, np++) { clas = rb_obj_class(*np); if (ox_element_clas == clas) { dump_gen_element(*np, d2, out); } else if (ox_instruct_clas == clas) { dump_gen_instruct(*np, d2, out); indent_needed = (1 == cnt) ? 0 : 1; } else if (rb_cString == clas) { dump_str_value(out, StringValuePtr(*np), RSTRING_LEN(*np)); indent_needed = (1 == cnt) ? 0 : 1; } else if (ox_comment_clas == clas) { dump_gen_val_node(*np, d2, "", 4, out); } else if (ox_cdata_clas == clas) { dump_gen_val_node(*np, d2, "", 3, out); } else if (ox_doctype_clas == clas) { dump_gen_val_node(*np, d2, "", 2, out); } else { rb_raise(rb_eTypeError, "Unexpected class, %s, while dumping generic XML\n", rb_class2name(clas)); } } } return indent_needed; } static int dump_gen_attr(VALUE key, VALUE value, Out out) { #if HAS_PRIVATE_ENCODING // There seems to be a bug in jruby for converting symbols to strings and preserving the encoding. This is a work // around. const char *ks = rb_str_ptr(rb_String(key)); #else const char *ks = (T_SYMBOL == rb_type(key)) ? rb_id2name(SYM2ID(key)) : StringValuePtr(key); #endif size_t klen = strlen(ks); size_t size = 4 + klen + RSTRING_LEN(value); if (out->end - out->cur <= (long)size) { grow(out, size); } *out->cur++ = ' '; fill_value(out, ks, klen); *out->cur++ = '='; *out->cur++ = '"'; dump_str_value(out, StringValuePtr(value), RSTRING_LEN(value)); *out->cur++ = '"'; return ST_CONTINUE; } static void dump_gen_val_node(VALUE obj, unsigned int depth, const char *pre, size_t plen, const char *suf, size_t slen, Out out) { VALUE v = rb_attr_get(obj, ox_at_value_id); const char *val; size_t vlen; size_t size; int indent; if (T_STRING != rb_type(v)) { return; } val = StringValuePtr(v); vlen = RSTRING_LEN(v); if (0 > out->indent) { indent = -1; } else if (0 == out->indent) { indent = 0; } else { indent = depth * out->indent; } size = indent + plen + slen + vlen; if (out->end - out->cur <= (long)size) { grow(out, size); } fill_indent(out, indent); fill_value(out, pre, plen); fill_value(out, val, vlen); fill_value(out, suf, slen); *out->cur = '\0'; } static void dump_obj_to_xml(VALUE obj, Options copts, Out out) { VALUE clas = rb_obj_class(obj); out->w_time = (Yes == copts->xsd_date) ? dump_time_xsd : dump_time_thin; out->buf = ALLOC_N(char, 65336); out->end = out->buf + 65325; /* 10 less than end plus extra for possible errors */ out->cur = out->buf; out->circ_cache = 0; out->circ_cnt = 0; out->opts = copts; out->obj = obj; if (Yes == copts->circular) { ox_cache8_new(&out->circ_cache); } out->indent = copts->indent; if (ox_document_clas == clas) { dump_gen_doc(obj, -1, out); } else if (ox_element_clas == clas) { dump_gen_element(obj, 0, out); } else { out->w_start = dump_start; out->w_end = dump_end; dump_first_obj(obj, out); } dump_value(out, "\n", 1); if (Yes == copts->circular) { ox_cache8_delete(out->circ_cache); } } char* ox_write_obj_to_str(VALUE obj, Options copts) { struct _Out out; dump_obj_to_xml(obj, copts, &out); return out.buf; } void ox_write_obj_to_file(VALUE obj, const char *path, Options copts) { struct _Out out; size_t size; FILE *f; dump_obj_to_xml(obj, copts, &out); size = out.cur - out.buf; if (0 == (f = fopen(path, "w"))) { rb_raise(rb_eIOError, "%s\n", strerror(errno)); } if (size != fwrite(out.buf, 1, size, f)) { int err = ferror(f); rb_raise(rb_eIOError, "Write failed. [%d:%s]\n", err, strerror(err)); } xfree(out.buf); fclose(f); } ox-1.8.9/ext/ox/ox.h0000644000004100000410000002012212111637206014230 0ustar www-datawww-data/* ox.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_H__ #define __OX_H__ #if defined(__cplusplus) extern "C" { #if 0 } /* satisfy cc-mode */ #endif #endif #define RSTRING_NOT_MODIFIED #include "ruby.h" #if HAS_ENCODING_SUPPORT #include "ruby/encoding.h" #endif #ifdef RUBINIUS_RUBY #undef T_COMPLEX enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK}; #else #if HAS_TOP_LEVEL_ST_H /* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */ #include "st.h" #else #include "ruby/st.h" #endif #endif #include "cache.h" #define raise_error(msg, xml, current) _ox_raise_error(msg, xml, current, __FILE__, __LINE__) #define MAX_TEXT_LEN 4096 #define MAX_ATTRS 1024 #define MAX_DEPTH 1024 #define SILENT 0 #define TRACE 1 #define DEBUG 2 #define XSD_DATE 0x0001 #define WITH_XML 0x0002 #define WITH_INST 0x0004 #define WITH_DTD 0x0008 #define CIRCULAR 0x0010 #define XSD_DATE_SET 0x0100 #define WITH_XML_SET 0x0200 #define WITH_INST_SET 0x0400 #define WITH_DTD_SET 0x0800 #define CIRCULAR_SET 0x1000 typedef enum { UseObj = 1, UseAttr = 2, UseAttrSet = 3, UseArray = 4, UseAMember = 5, UseHash = 6, UseHashKey = 7, UseHashVal = 8, UseRange = 9, UseRangeAttr= 10, UseRaw = 11, } Use; typedef enum { StrictEffort = 's', TolerantEffort = 't', AutoEffort = 'a', NoEffort = 0, } Effort; typedef enum { Yes = 'y', No = 'n', NotSet = 0 } YesNo; typedef enum { ObjMode = 'o', GenMode = 'g', LimMode = 'l', NoMode = 0 } LoadMode; typedef enum { NoCode = 0, ArrayCode = 'a', String64Code = 'b', /* base64 encoded String */ ClassCode = 'c', Symbol64Code = 'd', /* base64 encoded Symbol */ DateCode = 'D', ExceptionCode = 'e', FloatCode = 'f', RegexpCode = 'g', HashCode = 'h', FixnumCode = 'i', BignumCode = 'j', KeyCode = 'k', /* indicates the value is a hash key, kind of a hack */ RationalCode = 'l', SymbolCode = 'm', FalseClassCode = 'n', ObjectCode = 'o', RefCode = 'p', RangeCode = 'r', StringCode = 's', TimeCode = 't', StructCode = 'u', ComplexCode = 'v', RawCode = 'x', TrueClassCode = 'y', NilClassCode = 'z', } Type; typedef struct _Attr { const char *name; const char *value; } *Attr; typedef struct _Helper { ID var; /* Object var ID */ VALUE obj; /* object created or Qundef if not appropriate */ Type type; /* type of object in obj */ } *Helper; typedef struct _PInfo *PInfo; typedef struct _ParseCallbacks { void (*instruct)(PInfo pi, const char *target, Attr attrs, const char *content); void (*add_doctype)(PInfo pi, const char *docType); void (*add_comment)(PInfo pi, const char *comment); void (*add_cdata)(PInfo pi, const char *cdata, size_t len); void (*add_text)(PInfo pi, char *text, int closed); void (*add_element)(PInfo pi, const char *ename, Attr attrs, int hasChildren); void (*end_element)(PInfo pi, const char *ename); } *ParseCallbacks; typedef struct _CircArray { VALUE obj_array[1024]; VALUE *objs; unsigned long size; /* allocated size or initial array size */ unsigned long cnt; } *CircArray; typedef struct _Options { char encoding[64]; /* encoding, stored in the option to avoid GC invalidation in default values */ int indent; /* indention for dump, default 2 */ int trace; /* trace level */ char with_dtd; /* YesNo */ char with_xml; /* YesNo */ char with_instruct; /* YesNo */ char circular; /* YesNo */ char xsd_date; /* YesNo */ char mode; /* LoadMode */ char effort; /* Effort */ char sym_keys; /* symbolize keys */ #ifdef HAVE_RUBY_ENCODING_H rb_encoding *rb_enc; #elif HAS_PRIVATE_ENCODING VALUE rb_enc; #else void *rb_enc; #endif } *Options; /* parse information structure */ struct _PInfo { struct _Helper helpers[MAX_DEPTH]; Helper h; /* current helper or 0 if not set */ char *str; /* buffer being read from */ char *s; /* current position in buffer */ VALUE obj; ParseCallbacks pcb; CircArray circ_array; unsigned long id; /* set for text types when cirs_array is set */ Options options; }; extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options); extern void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line); extern void ox_sax_parse(VALUE handler, VALUE io, int convert); extern void ox_sax_define(void); extern char* ox_write_obj_to_str(VALUE obj, Options copts); extern void ox_write_obj_to_file(VALUE obj, const char *path, Options copts); extern struct _Options ox_default_options; extern VALUE Ox; extern ID ox_at_content_id; extern ID ox_at_id; extern ID ox_at_value_id; extern ID ox_attr_id; extern ID ox_attr_value_id; extern ID ox_attributes_id; extern ID ox_beg_id; extern ID ox_cdata_id; extern ID ox_comment_id; extern ID ox_den_id; extern ID ox_doctype_id; extern ID ox_end_element_id; extern ID ox_end_id; extern ID ox_end_instruct_id; extern ID ox_error_id; extern ID ox_excl_id; extern ID ox_external_encoding_id; extern ID ox_fileno_id; extern ID ox_force_encoding_id; extern ID ox_inspect_id; extern ID ox_instruct_id; extern ID ox_jd_id; extern ID ox_keys_id; extern ID ox_local_id; extern ID ox_mesg_id; extern ID ox_message_id; extern ID ox_nodes_id; extern ID ox_num_id; extern ID ox_parse_id; extern ID ox_read_id; extern ID ox_readpartial_id; extern ID ox_start_element_id; extern ID ox_string_id; extern ID ox_text_id; extern ID ox_to_c_id; extern ID ox_to_s_id; extern ID ox_to_sym_id; extern ID ox_tv_sec_id; extern ID ox_tv_nsec_id; extern ID ox_tv_usec_id; extern ID ox_value_id; #if HAS_ENCODING_SUPPORT extern rb_encoding *ox_utf8_encoding; #elif HAS_PRIVATE_ENCODING extern VALUE ox_utf8_encoding; #else extern void *ox_utf8_encoding; #endif extern VALUE ox_arg_error_class; extern VALUE ox_date_class; extern VALUE ox_empty_string; extern VALUE ox_encoding_sym; extern VALUE ox_parse_error_class; extern VALUE ox_stringio_class; extern VALUE ox_struct_class; extern VALUE ox_time_class; extern VALUE ox_zero_fixnum; extern VALUE ox_document_clas; extern VALUE ox_element_clas; extern VALUE ox_instruct_clas; extern VALUE ox_bag_clas; extern VALUE ox_comment_clas; extern VALUE ox_doctype_clas; extern VALUE ox_cdata_clas; extern Cache ox_symbol_cache; extern Cache ox_class_cache; extern Cache ox_attr_cache; #if defined(__cplusplus) #if 0 { /* satisfy cc-mode */ #endif } /* extern "C" { */ #endif #endif /* __OX_H__ */ ox-1.8.9/ext/ox/cache8_test.c0000644000004100000410000000507512111637206016001 0ustar www-datawww-data/* cache8_test.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "cache8.h" static slot_t data[] = { 0x000000A0A0A0A0A0ULL, 0x0000000000ABCDEFULL, 0x0123456789ABCDEFULL, 0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000003ULL, 0x0000000000000004ULL, 0 }; void ox_cache8_test() { Cache8 c; slot_t v; slot_t *d; slot_t cnt = 1; slot_t *slot = 0; ox_cache8_new(&c); for (d = data; 0 != *d; d++) { v = ox_cache8_get(c, *d, &slot); if (0 == v) { if (0 == slot) { printf("*** failed to get a slot for 0x%016llx\n", (unsigned long long)*d); } else { printf("*** adding 0x%016llx to cache with value %llu\n", (unsigned long long)*d, (unsigned long long)cnt); *slot = cnt++; } } else { printf("*** get on 0x%016llx returned %llu\n", (unsigned long long)*d, (unsigned long long)v); } /*ox_cache8_print(c); */ } ox_cache8_print(c); } ox-1.8.9/ext/ox/sax.c0000644000004100000410000011010712111637206014373 0ustar www-datawww-data/* sax.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #if NEEDS_UIO #include #endif #include #include #include "ruby.h" #include "ox.h" typedef struct _SaxDrive { char base_buf[0x00010000]; char *buf; char *buf_end; char *cur; char *read_end; /* one past last character read */ char *str; /* start of current string being read */ int line; int col; VALUE handler; VALUE value_obj; int (*read_func)(struct _SaxDrive *dr); int convert_special; union { int fd; VALUE io; const char *in_str; }; int has_instruct; int has_end_instruct; int has_attr; int has_attr_value; int has_doctype; int has_comment; int has_cdata; int has_text; int has_value; int has_start_element; int has_end_element; int has_error; #if HAS_ENCODING_SUPPORT rb_encoding *encoding; #elif HAS_PRIVATE_ENCODING VALUE encoding; #endif } *SaxDrive; #ifdef NEEDS_STPCPY char *stpncpy(char *dest, const char *src, size_t n); #endif static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert); static void sax_drive_cleanup(SaxDrive dr); static int sax_drive_read(SaxDrive dr); static void sax_drive_error(SaxDrive dr, const char *msg, int critical); static int read_children(SaxDrive dr, int first); static int read_instruction(SaxDrive dr); static int read_doctype(SaxDrive dr); static int read_cdata(SaxDrive dr); static int read_comment(SaxDrive dr); static int read_element(SaxDrive dr); static int read_text(SaxDrive dr); static const char* read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml); static char read_name_token(SaxDrive dr); static int read_quoted_value(SaxDrive dr); static int collapse_special(char *str); static VALUE rescue_cb(VALUE rdr, VALUE err); static VALUE io_cb(VALUE rdr); static VALUE partial_io_cb(VALUE rdr); static int read_from_io(SaxDrive dr); #ifndef JRUBY_RUBY static int read_from_fd(SaxDrive dr); #endif static int read_from_io_partial(SaxDrive dr); static int read_from_str(SaxDrive dr); static VALUE sax_value_class; /* This is only for CentOS 5.4 with Ruby 1.9.3-p0 and for OS X 10.6 and Solaris 10. */ #ifdef NEEDS_STPCPY char *stpncpy(char *dest, const char *src, size_t n) { size_t cnt = strlen(src) + 1; if (n < cnt) { cnt = n; } strncpy(dest, src, cnt); return dest + cnt - 1; } #endif static inline char sax_drive_get(SaxDrive dr) { if (dr->read_end <= dr->cur) { if (0 != sax_drive_read(dr)) { return 0; } } if ('\n' == *dr->cur) { dr->line++; dr->col = 0; } dr->col++; return *dr->cur++; } /* Starts by reading a character so it is safe to use with an empty or * compacted buffer. */ inline static char next_non_white(SaxDrive dr) { char c; while ('\0' != (c = sax_drive_get(dr))) { switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: return c; } } return '\0'; } /* Starts by reading a character so it is safe to use with an empty or * compacted buffer. */ inline static char next_white(SaxDrive dr) { char c; while ('\0' != (c = sax_drive_get(dr))) { switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': case '\0': return c; default: break; } } return '\0'; } inline static int is_white(char c) { switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': return 1; default: break; } return 0; } inline static VALUE str2sym(const char *str, SaxDrive dr, char **strp) { VALUE *slot; VALUE sym; if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) { #if HAS_ENCODING_SUPPORT if (0 != dr->encoding) { VALUE rstr = rb_str_new2(str); rb_enc_associate(rstr, dr->encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(str)); } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding) { VALUE rstr = rb_str_new2(str); rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(str)); } #else sym = ID2SYM(rb_intern(str)); #endif *slot = sym; } return sym; } void ox_sax_parse(VALUE handler, VALUE io, int convert) { struct _SaxDrive dr; sax_drive_init(&dr, handler, io, convert); #if 0 printf("*** sax_parse with these flags\n"); printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false"); printf(" has_end_instruct = %s\n", dr.has_end_instruct ? "true" : "false"); printf(" has_attr = %s\n", dr.has_attr ? "true" : "false"); printf(" has_attr_value = %s\n", dr.has_attr_value ? "true" : "false"); printf(" has_doctype = %s\n", dr.has_doctype ? "true" : "false"); printf(" has_comment = %s\n", dr.has_comment ? "true" : "false"); printf(" has_cdata = %s\n", dr.has_cdata ? "true" : "false"); printf(" has_text = %s\n", dr.has_text ? "true" : "false"); printf(" has_value = %s\n", dr.has_value ? "true" : "false"); printf(" has_start_element = %s\n", dr.has_start_element ? "true" : "false"); printf(" has_end_element = %s\n", dr.has_end_element ? "true" : "false"); printf(" has_error = %s\n", dr.has_error ? "true" : "false"); #endif read_children(&dr, 1); sax_drive_cleanup(&dr); } inline static int respond_to(VALUE obj, ID method) { #ifdef JRUBY_RUBY /* There is a bug in JRuby where rb_respond_to() returns true (1) even if * a method is private. */ { VALUE args[1]; *args = ID2SYM(method); return (Qtrue == rb_funcall2(obj, rb_intern("respond_to?"), 1, args)); } #else return rb_respond_to(obj, method); #endif } static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) { if (ox_stringio_class == rb_obj_class(io)) { VALUE s = rb_funcall2(io, ox_string_id, 0, 0); dr->read_func = read_from_str; dr->in_str = StringValuePtr(s); } else if (rb_respond_to(io, ox_readpartial_id)) { #ifdef JRUBY_RUBY dr->read_func = read_from_io_partial; dr->io = io; #else VALUE rfd; if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) { dr->read_func = read_from_fd; dr->fd = FIX2INT(rfd); } else { dr->read_func = read_from_io_partial; dr->io = io; } #endif } else if (rb_respond_to(io, ox_read_id)) { #ifdef JRUBY_RUBY dr->read_func = read_from_io; dr->io = io; #else VALUE rfd; if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) { dr->read_func = read_from_fd; dr->fd = FIX2INT(rfd); } else { dr->read_func = read_from_io; dr->io = io; } #endif } else { rb_raise(ox_arg_error_class, "sax_parser io argument must respond to readpartial() or read().\n"); } dr->buf = dr->base_buf; *dr->buf = '\0'; dr->buf_end = dr->buf + sizeof(dr->base_buf) - 1; /* 1 less to make debugging easier */ dr->cur = dr->buf; dr->read_end = dr->buf; dr->str = 0; dr->line = 1; dr->col = 0; dr->handler = handler; dr->value_obj = rb_data_object_alloc(sax_value_class, dr, 0, 0); rb_gc_register_address(&dr->value_obj); dr->convert_special = convert; dr->has_instruct = respond_to(handler, ox_instruct_id); dr->has_end_instruct = respond_to(handler, ox_end_instruct_id); dr->has_attr = respond_to(handler, ox_attr_id); dr->has_attr_value = respond_to(handler, ox_attr_value_id); dr->has_doctype = respond_to(handler, ox_doctype_id); dr->has_comment = respond_to(handler, ox_comment_id); dr->has_cdata = respond_to(handler, ox_cdata_id); dr->has_text = respond_to(handler, ox_text_id); dr->has_value = respond_to(handler, ox_value_id); dr->has_start_element = respond_to(handler, ox_start_element_id); dr->has_end_element = respond_to(handler, ox_end_element_id); dr->has_error = respond_to(handler, ox_error_id); #if HAS_ENCODING_SUPPORT if ('\0' == *ox_default_options.encoding) { VALUE encoding; if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) { dr->encoding = rb_enc_from_index(rb_enc_get_index(encoding)); } else { dr->encoding = 0; } } else { dr->encoding = rb_enc_find(ox_default_options.encoding); } #elif HAS_PRIVATE_ENCODING if ('\0' == *ox_default_options.encoding) { VALUE encoding; if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) { dr->encoding = encoding; } else { dr->encoding = Qnil; } } else { dr->encoding = rb_str_new2(ox_default_options.encoding); } #endif } static void sax_drive_cleanup(SaxDrive dr) { rb_gc_unregister_address(&dr->value_obj); if (dr->base_buf != dr->buf) { xfree(dr->buf); } } static int sax_drive_read(SaxDrive dr) { int err; size_t shift = 0; if (dr->buf < dr->cur) { if (0 == dr->str) { shift = dr->cur - dr->buf; } else { shift = dr->str - dr->buf; } /*printf("\n*** shift: %lu\n", shift); */ if (0 == shift) { /* no space left so allocate more */ char *old = dr->buf; size_t size = dr->buf_end - dr->buf; if (dr->buf == dr->base_buf) { dr->buf = ALLOC_N(char, size * 2); memcpy(dr->buf, old, size); } else { REALLOC_N(dr->buf, char, size * 2); } dr->buf_end = dr->buf + size * 2; dr->cur = dr->buf + (dr->cur - old); dr->read_end = dr->buf + (dr->read_end - old); if (0 != dr->str) { dr->str = dr->buf + (dr->str - old); } } else { memmove(dr->buf, dr->buf + shift, dr->read_end - (dr->buf + shift)); dr->cur -= shift; dr->read_end -= shift; if (0 != dr->str) { dr->str -= shift; } } } err = dr->read_func(dr); *dr->read_end = '\0'; return err; } static void sax_drive_error(SaxDrive dr, const char *msg, int critical) { if (dr->has_error) { VALUE args[3]; args[0] = rb_str_new2(msg); args[1] = INT2FIX(dr->line); args[2] = INT2FIX(dr->col); rb_funcall2(dr->handler, ox_error_id, 3, args); } else if (critical) { sax_drive_cleanup(dr); rb_raise(ox_parse_error_class, "%s at line %d, column %d\n", msg, dr->line, dr->col); } } static int read_children(SaxDrive dr, int first) { int err = 0; int element_read = !first; char c; while (!err) { dr->str = dr->cur; /* protect the start */ c = sax_drive_get(dr); if (first) { if (0xEF == (uint8_t)c) { /* only UTF8 is supported */ if (0xBB == (uint8_t)sax_drive_get(dr) && 0xBF == (uint8_t)sax_drive_get(dr)) { #if HAS_ENCODING_SUPPORT dr->encoding = ox_utf8_encoding; #elif HAS_PRIVATE_ENCODING dr->encoding = ox_utf8_encoding; #endif c = sax_drive_get(dr); } else { sax_drive_error(dr, "invalid format, invalid BOM or a binary file.", 1); } } } if ('\0' == c || (is_white(c) && '\0' == (c = next_non_white(dr)))) { if (!first) { sax_drive_error(dr, "invalid format, element not terminated", 1); err = 1; } break; /* normal completion if first */ } if ('<' != c) { if (first) { /* all top level entities start with < */ sax_drive_error(dr, "invalid format, expected <", 1); break; /* unrecoverable */ } if (0 != (err = read_text(dr))) { /* finished when < is reached */ break; } } dr->str = dr->cur; /* protect the start for elements */ c = sax_drive_get(dr); switch (c) { case '?': /* instructions (xml or otherwise) */ err = read_instruction(dr); break; case '!': /* comment or doctype */ dr->str = dr->cur; c = sax_drive_get(dr); if ('\0' == c) { sax_drive_error(dr, "invalid format, DOCTYPE or comment not terminated", 1); err = 1; } else if ('-' == c) { c = sax_drive_get(dr); /* skip first - and get next character */ if ('-' != c) { sax_drive_error(dr, "invalid format, bad comment format", 1); err = 1; } else { c = sax_drive_get(dr); /* skip second - */ err = read_comment(dr); } } else { int i; for (i = 7; 0 < i; i--) { sax_drive_get(dr); } if (0 == strncmp("DOCTYPE", dr->str, 7)) { if (element_read || !first) { sax_drive_error(dr, "invalid format, DOCTYPE can not come after an element", 0); } err = read_doctype(dr); } else if (0 == strncmp("[CDATA[", dr->str, 7)) { err = read_cdata(dr); } else { sax_drive_error(dr, "invalid format, DOCTYPE or comment expected", 1); err = 1; } } break; case '/': /* element end */ return ('\0' == read_name_token(dr)); break; case '\0': sax_drive_error(dr, "invalid format, document not terminated", 1); err = 1; break; default: dr->cur--; /* safe since no read occurred after getting last character */ if (first && element_read) { sax_drive_error(dr, "invalid format, multiple top level elements", 0); } err = read_element(dr); element_read = 1; break; } } return err; } static void read_content(SaxDrive dr, char *content, size_t len) { char c; char *end = content + len; while ('\0' != (c = sax_drive_get(dr))) { if (end < content) { sax_drive_error(dr, "processing instruction content too large", 1); } if ('?' == c) { if ('\0' == (c = sax_drive_get(dr))) { sax_drive_error(dr, "invalid format, document not terminated", 1); } if ('>' == c) { *content = '\0'; return; } else { *content++ = c; } } else { *content++ = c; } } *content = '\0'; } /* Entered after the "str)); if (dr->has_instruct || dr->has_end_instruct) { target = rb_str_new2(dr->str); } if (dr->has_instruct) { VALUE args[1]; args[0] = target; rb_funcall2(dr->handler, ox_instruct_id, 1, args); } dr->str = dr->cur; /* make sure the start doesn't get compacted out */ read_content(dr, content, sizeof(content) - 1); cend = dr->cur; dr->cur = dr->str; if (0 != (err = read_attrs(dr, c, '?', '?', is_xml))) { if (dr->has_text) { VALUE args[1]; if (dr->convert_special) { if (0 != collapse_special(content)) { sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0); } } args[0] = rb_str_new2(content); #if HAS_ENCODING_SUPPORT if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding) { rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding); } #endif rb_funcall2(dr->handler, ox_text_id, 1, args); } dr->cur = cend; } else { c = next_non_white(dr); if ('>' != c) { sax_drive_error(dr, "invalid format, instruction not terminated", 1); return -1; } } if (dr->has_end_instruct) { VALUE args[1]; args[0] = target; rb_funcall2(dr->handler, ox_end_instruct_id, 1, args); } dr->str = 0; return 0; } /* Entered after the "str = dr->cur - 1; /* mark the start */ while ('>' != (c = sax_drive_get(dr))) { if ('\0' == c) { sax_drive_error(dr, "invalid format, doctype terminated unexpectedly", 1); return -1; } } *(dr->cur - 1) = '\0'; if (dr->has_doctype) { VALUE args[1]; args[0] = rb_str_new2(dr->str); rb_funcall2(dr->handler, ox_doctype_id, 1, args); } dr->str = 0; return 0; } /* Entered after the "cur--; /* back up to the start in case the cdata is empty */ dr->str = dr->cur; /* mark the start */ while (1) { c = sax_drive_get(dr); if (']' == c) { end++; } else if ('>' == c) { if (2 <= end) { *(dr->cur - 3) = '\0'; break; } end = 0; } else if ('\0' == c) { sax_drive_error(dr, "invalid format, cdata terminated unexpectedly", 1); return -1; } else { end = 0; } } if (dr->has_cdata) { VALUE args[1]; args[0] = rb_str_new2(dr->str); #if HAS_ENCODING_SUPPORT if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding) { rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding); } #endif rb_funcall2(dr->handler, ox_cdata_id, 1, args); } dr->str = 0; return 0; } /* Entered after the "