hpricot-0.8.6/0000755000175000017500000000000011710073440012550 5ustar boutilboutilhpricot-0.8.6/metadata.yml0000644000175000017500000000504611710073440015060 0ustar boutilboutil--- !ruby/object:Gem::Specification name: hpricot version: !ruby/object:Gem::Version hash: 51 prerelease: segments: - 0 - 8 - 6 version: 0.8.6 platform: ruby authors: - why the lucky stiff autorequire: bindir: bin cert_chain: [] date: 2012-01-17 00:00:00 Z dependencies: [] description: a swift, liberal HTML parser with a fantastic library email: why@ruby-lang.org executables: [] extensions: - ext/fast_xs/extconf.rb - ext/hpricot_scan/extconf.rb extra_rdoc_files: - README.md - CHANGELOG - COPYING files: - CHANGELOG - COPYING - README.md - Rakefile - test/files/basic.xhtml - test/files/bnqt.html - test/files/boingboing.html - test/files/cy0.html - test/files/immob.html - test/files/pace_application.html - test/files/tenderlove.html - test/files/uswebgen.html - test/files/utf8.html - test/files/week9.html - test/files/why.xml - test/load_files.rb - test/nokogiri-bench.rb - test/test_alter.rb - test/test_builder.rb - test/test_parser.rb - test/test_paths.rb - test/test_preserved.rb - test/test_xml.rb - extras/hpricot.png - lib/hpricot/blankslate.rb - lib/hpricot/builder.rb - lib/hpricot/elements.rb - lib/hpricot/htmlinfo.rb - lib/hpricot/inspect.rb - lib/hpricot/modules.rb - lib/hpricot/parse.rb - lib/hpricot/tag.rb - lib/hpricot/tags.rb - lib/hpricot/traverse.rb - lib/hpricot/xchar.rb - lib/hpricot.rb - ext/hpricot_scan/hpricot_scan.h - ext/fast_xs/FastXsService.java - ext/hpricot_scan/HpricotCss.java - ext/hpricot_scan/HpricotScanService.java - ext/fast_xs/fast_xs.c - ext/hpricot_scan/hpricot_css.c - ext/hpricot_scan/hpricot_scan.c - ext/fast_xs/extconf.rb - ext/hpricot_scan/extconf.rb - ext/hpricot_scan/hpricot_common.rl - ext/hpricot_scan/hpricot_css.java.rl - ext/hpricot_scan/hpricot_css.rl - ext/hpricot_scan/hpricot_scan.java.rl - ext/hpricot_scan/hpricot_scan.rl homepage: http://code.whytheluckystiff.net/hpricot/ licenses: [] post_install_message: rdoc_options: - --quiet - --title - The Hpricot Reference - --main - README.md - --inline-source require_paths: - lib required_ruby_version: !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" required_rubygems_version: !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" requirements: [] rubyforge_project: hobix rubygems_version: 1.8.6 signing_key: specification_version: 3 summary: a swift, liberal HTML parser with a fantastic library test_files: [] hpricot-0.8.6/ext/0000755000175000017500000000000011710073440013350 5ustar boutilboutilhpricot-0.8.6/ext/fast_xs/0000755000175000017500000000000011710073440015017 5ustar boutilboutilhpricot-0.8.6/ext/fast_xs/extconf.rb0000644000175000017500000000013711710073440017013 0ustar boutilboutilrequire 'mkmf' have_header('stdio.h') or exit dir_config('fast_xs') create_makefile('fast_xs') hpricot-0.8.6/ext/fast_xs/fast_xs.c0000644000175000017500000001267211710073440016642 0ustar boutilboutil#include #include #ifdef HAVE_RUBY_ENCODING_H #include # define ASSOCIATE_INDEX(s,enc) rb_enc_associate_index((s), rb_enc_to_index(enc)) #else # define ASSOCIATE_INDEX(s,enc) #endif #ifndef RARRAY_LEN #define RARRAY_LEN(arr) RARRAY(arr)->len #define RARRAY_PTR(arr) RARRAY(arr)->ptr #define RSTRING_LEN(str) RSTRING(str)->len #define RSTRING_PTR(str) RSTRING(str)->ptr #endif static ID unpack_id; static VALUE U_fmt, C_fmt; /* give GCC hints for better branch prediction * (we layout branches so that ASCII characters are handled faster) */ #if defined(__GNUC__) && (__GNUC__ >= 3) # define likely(x) __builtin_expect (!!(x), 1) # define unlikely(x) __builtin_expect (!!(x), 0) #else # define unlikely(x) (x) # define likely(x) (x) #endif /* pass-through certain characters for CP-1252 */ #define p(x) (x-128) static const int cp_1252[] = { 8364, /* 128 => 8364, euro sign */ p(129), /* 129 => 129, pass-through */ 8218, /* 130 => 8218, single low-9 quotation mark */ 402, /* 131 => 402, latin small letter f with hook */ 8222, /* 132 => 8222, double low-9 quotation mark */ 8230, /* 133 => 8230, horizontal ellipsis */ 8224, /* 134 => 8224, dagger */ 8225, /* 135 => 8225, double dagger */ 710, /* 136 => 710, modifier letter circumflex accent */ 8240, /* 137 => 8240, per mille sign */ 352, /* 138 => 352, latin capital letter s with caron */ 8249, /* 139 => 8249, single left-pointing angle quotation mark */ 338, /* 140 => 338, latin capital ligature oe */ p(141), /* 141 => 141, pass-through */ 381, /* 142 => 381, latin capital letter z with caron */ p(143), /* 143 => 143, pass-through */ p(144), /* 144 => 144, pass-through */ 8216, /* 145 => 8216, left single quotation mark */ 8217, /* 146 => 8217, right single quotation mark */ 8220, /* 147 => 8220, left double quotation mark */ 8221, /* 148 => 8221, right double quotation mark */ 8226, /* 149 => 8226, bullet */ 8211, /* 150 => 8211, en dash */ 8212, /* 151 => 8212, em dash */ 732, /* 152 => 732, small tilde */ 8482, /* 153 => 8482, trade mark sign */ 353, /* 154 => 353, latin small letter s with caron */ 8250, /* 155 => 8250, single right-pointing angle quotation mark */ 339, /* 156 => 339, latin small ligature oe */ p(157), /* 157 => 157, pass-through */ 382, /* 158 => 382, latin small letter z with caron */ 376 /* 159 => 376} latin capital letter y with diaeresis */ }; #define VALID_VALUE(n) \ (n >= 0x20 && n <= 0xD7FF) || \ (n >= 0xE000 && n <= 0xFFFD) || \ (n >= 0x10000 && n <= 0x10FFFF) #define CP_1252_ESCAPE(n) do { \ if (n >= 128 && n <= 159) \ n = cp_1252[n - 128]; \ } while(0) static inline size_t bytes_for(int n) { if (n < 1000) return sizeof("ϧ") - 1; if (n < 10000) return sizeof("✏") - 1; if (n < 100000) return sizeof("𘚟") - 1; if (n < 1000000) return sizeof("󴈿") - 1; /* if (n < 10000000), we won't have cases above 0x10FFFF */ return sizeof("�") - 1; } static size_t escape(char *buf, int n) { #define return_const_len(x) do { \ memcpy(buf, x, sizeof(x) - 1); \ return (sizeof(x) - 1); \ } while (0) /* handle ASCII first */ if (likely(n < 128)) { if (likely(n >= 0x20 || n == '\t' || n == '\n' || n == '\r')) { if (unlikely(n == '"')) return_const_len("""); if (unlikely(n == '&')) return_const_len("&"); if (unlikely(n == '<')) return_const_len("<"); if (unlikely(n == '>')) return_const_len(">"); buf[0] = (char)n; return 1; } buf[0] = '*'; return 1; } #undef return_const_len CP_1252_ESCAPE(n); if (VALID_VALUE(n)) { /* return snprintf(buf, sizeof("􏿿"), "&#%i;", n); */ static const char digitmap[] = "0123456789"; size_t rv = sizeof("&#;") - 1; buf += bytes_for(n); *--buf = ';'; do { *--buf = digitmap[(int)(n % 10)]; ++rv; } while (n /= 10); *--buf = '#'; *--buf = '&'; return rv; } buf[0] = '*'; return 1; } static VALUE unpack_utf8(VALUE self) { return rb_funcall(self, unpack_id, 1, U_fmt); } static VALUE unpack_uchar(VALUE self) { return rb_funcall(self, unpack_id, 1, C_fmt); } /* * escapes strings for XML * The double-quote (") character is translated to """ */ static VALUE fast_xs(VALUE self) { long i; VALUE array; char *c; size_t s_len; VALUE *tmp; VALUE rv; array = rb_rescue(unpack_utf8, self, unpack_uchar, self); for (tmp = RARRAY_PTR(array), s_len = i = RARRAY_LEN(array); --i >= 0; tmp++) { int n = NUM2INT(*tmp); if (likely(n < 128)) { if (unlikely(n == '"')) s_len += (sizeof(""") - 2); if (unlikely(n == '&')) s_len += (sizeof("&") - 2); if (unlikely(n == '>' || n == '<')) s_len += (sizeof(">") - 2); continue; } CP_1252_ESCAPE(n); if (VALID_VALUE(n)) s_len += bytes_for(n) - 1; } rv = rb_str_new(NULL, s_len); ASSOCIATE_INDEX(rv, rb_default_external_encoding()); c = RSTRING_PTR(rv); for (tmp = RARRAY_PTR(array), i = RARRAY_LEN(array); --i >= 0; tmp++) c += escape(c, NUM2INT(*tmp)); return rv; } void Init_fast_xs(void) { assert(cp_1252[159 - 128] == 376); /* just in case I skipped a line */ unpack_id = rb_intern("unpack"); U_fmt = rb_str_new("U*", 2); ASSOCIATE_INDEX(U_fmt, rb_ascii8bit_encoding()); C_fmt = rb_str_new("C*", 2); ASSOCIATE_INDEX(C_fmt, rb_ascii8bit_encoding()); rb_global_variable(&U_fmt); rb_global_variable(&C_fmt); rb_define_method(rb_cString, "fast_xs", fast_xs, 0); } hpricot-0.8.6/ext/fast_xs/FastXsService.java0000644000175000017500000012750711710073440020427 0ustar boutilboutil import java.io.IOException; import java.io.StringWriter; import java.io.Writer; import java.util.HashMap; import java.util.Map; import java.util.TreeMap; import org.jruby.Ruby; import org.jruby.RubyModule; import org.jruby.runtime.CallbackFactory; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.load.BasicLibraryService; public class FastXsService implements BasicLibraryService { public boolean basicLoad(final Ruby runtime) throws IOException { RubyModule string = runtime.getModule("String"); CallbackFactory fact = runtime.callbackFactory(FastXsService.class); string.defineMethod("fast_xs",fact.getFastSingletonMethod("fast_xs")); return true; } public static IRubyObject fast_xs(IRubyObject recv) { String string = recv.convertToString().getUnicodeValue(); StringWriter writer = new StringWriter ((int)(string.length() * 1.5)); try { Entities.FAST_XS.escape(writer, string); return recv.getRuntime().newString(writer.toString()); } catch (IOException e) { throw recv.getRuntime().newIOErrorFromException(e); } } } // From Apache commons-lang, // http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java?revision=560660&view=markup /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** *

* Provides HTML and XML entity utilities. *

* * @see ISO Entities * @see HTML 3.2 Character Entities for ISO Latin-1 * @see HTML 4.0 Character entity references * @see HTML 4.01 Character References * @see HTML 4.01 Code positions * * @author Alexander Day Chaffee * @author Gary Gregory * @since 2.0 * @version $Id$ */ class Entities { private static final String[][] BASIC_ARRAY = {{"quot", "34"}, // " - double-quote {"amp", "38"}, // & - ampersand {"lt", "60"}, // < - less-than {"gt", "62"}, // > - greater-than }; private static final String[][] APOS_ARRAY = {{"apos", "39"}, // XML apostrophe }; // package scoped for testing static final String[][] ISO8859_1_ARRAY = {{"nbsp", "160"}, // non-breaking space {"iexcl", "161"}, // inverted exclamation mark {"cent", "162"}, // cent sign {"pound", "163"}, // pound sign {"curren", "164"}, // currency sign {"yen", "165"}, // yen sign = yuan sign {"brvbar", "166"}, // broken bar = broken vertical bar {"sect", "167"}, // section sign {"uml", "168"}, // diaeresis = spacing diaeresis {"copy", "169"}, // © - copyright sign {"ordf", "170"}, // feminine ordinal indicator {"laquo", "171"}, // left-pointing double angle quotation mark = left pointing guillemet {"not", "172"}, // not sign {"shy", "173"}, // soft hyphen = discretionary hyphen {"reg", "174"}, // ® - registered trademark sign {"macr", "175"}, // macron = spacing macron = overline = APL overbar {"deg", "176"}, // degree sign {"plusmn", "177"}, // plus-minus sign = plus-or-minus sign {"sup2", "178"}, // superscript two = superscript digit two = squared {"sup3", "179"}, // superscript three = superscript digit three = cubed {"acute", "180"}, // acute accent = spacing acute {"micro", "181"}, // micro sign {"para", "182"}, // pilcrow sign = paragraph sign {"middot", "183"}, // middle dot = Georgian comma = Greek middle dot {"cedil", "184"}, // cedilla = spacing cedilla {"sup1", "185"}, // superscript one = superscript digit one {"ordm", "186"}, // masculine ordinal indicator {"raquo", "187"}, // right-pointing double angle quotation mark = right pointing guillemet {"frac14", "188"}, // vulgar fraction one quarter = fraction one quarter {"frac12", "189"}, // vulgar fraction one half = fraction one half {"frac34", "190"}, // vulgar fraction three quarters = fraction three quarters {"iquest", "191"}, // inverted question mark = turned question mark {"Agrave", "192"}, // À - uppercase A, grave accent {"Aacute", "193"}, // Á - uppercase A, acute accent {"Acirc", "194"}, // Â - uppercase A, circumflex accent {"Atilde", "195"}, // Ã - uppercase A, tilde {"Auml", "196"}, // Ä - uppercase A, umlaut {"Aring", "197"}, // Å - uppercase A, ring {"AElig", "198"}, // Æ - uppercase AE {"Ccedil", "199"}, // Ç - uppercase C, cedilla {"Egrave", "200"}, // È - uppercase E, grave accent {"Eacute", "201"}, // É - uppercase E, acute accent {"Ecirc", "202"}, // Ê - uppercase E, circumflex accent {"Euml", "203"}, // Ë - uppercase E, umlaut {"Igrave", "204"}, // Ì - uppercase I, grave accent {"Iacute", "205"}, // Í - uppercase I, acute accent {"Icirc", "206"}, // Î - uppercase I, circumflex accent {"Iuml", "207"}, // Ï - uppercase I, umlaut {"ETH", "208"}, // Ð - uppercase Eth, Icelandic {"Ntilde", "209"}, // Ñ - uppercase N, tilde {"Ograve", "210"}, // Ò - uppercase O, grave accent {"Oacute", "211"}, // Ó - uppercase O, acute accent {"Ocirc", "212"}, // Ô - uppercase O, circumflex accent {"Otilde", "213"}, // Õ - uppercase O, tilde {"Ouml", "214"}, // Ö - uppercase O, umlaut {"times", "215"}, // multiplication sign {"Oslash", "216"}, // Ø - uppercase O, slash {"Ugrave", "217"}, // Ù - uppercase U, grave accent {"Uacute", "218"}, // Ú - uppercase U, acute accent {"Ucirc", "219"}, // Û - uppercase U, circumflex accent {"Uuml", "220"}, // Ü - uppercase U, umlaut {"Yacute", "221"}, // Ý - uppercase Y, acute accent {"THORN", "222"}, // Þ - uppercase THORN, Icelandic {"szlig", "223"}, // ß - lowercase sharps, German {"agrave", "224"}, // à - lowercase a, grave accent {"aacute", "225"}, // á - lowercase a, acute accent {"acirc", "226"}, // â - lowercase a, circumflex accent {"atilde", "227"}, // ã - lowercase a, tilde {"auml", "228"}, // ä - lowercase a, umlaut {"aring", "229"}, // å - lowercase a, ring {"aelig", "230"}, // æ - lowercase ae {"ccedil", "231"}, // ç - lowercase c, cedilla {"egrave", "232"}, // è - lowercase e, grave accent {"eacute", "233"}, // é - lowercase e, acute accent {"ecirc", "234"}, // ê - lowercase e, circumflex accent {"euml", "235"}, // ë - lowercase e, umlaut {"igrave", "236"}, // ì - lowercase i, grave accent {"iacute", "237"}, // í - lowercase i, acute accent {"icirc", "238"}, // î - lowercase i, circumflex accent {"iuml", "239"}, // ï - lowercase i, umlaut {"eth", "240"}, // ð - lowercase eth, Icelandic {"ntilde", "241"}, // ñ - lowercase n, tilde {"ograve", "242"}, // ò - lowercase o, grave accent {"oacute", "243"}, // ó - lowercase o, acute accent {"ocirc", "244"}, // ô - lowercase o, circumflex accent {"otilde", "245"}, // õ - lowercase o, tilde {"ouml", "246"}, // ö - lowercase o, umlaut {"divide", "247"}, // division sign {"oslash", "248"}, // ø - lowercase o, slash {"ugrave", "249"}, // ù - lowercase u, grave accent {"uacute", "250"}, // ú - lowercase u, acute accent {"ucirc", "251"}, // û - lowercase u, circumflex accent {"uuml", "252"}, // ü - lowercase u, umlaut {"yacute", "253"}, // ý - lowercase y, acute accent {"thorn", "254"}, // þ - lowercase thorn, Icelandic {"yuml", "255"}, // ÿ - lowercase y, umlaut }; // http://www.w3.org/TR/REC-html40/sgml/entities.html // package scoped for testing static final String[][] HTML40_ARRAY = { // {"fnof", "402"}, // latin small f with hook = function= florin, U+0192 ISOtech --> // {"Alpha", "913"}, // greek capital letter alpha, U+0391 --> {"Beta", "914"}, // greek capital letter beta, U+0392 --> {"Gamma", "915"}, // greek capital letter gamma,U+0393 ISOgrk3 --> {"Delta", "916"}, // greek capital letter delta,U+0394 ISOgrk3 --> {"Epsilon", "917"}, // greek capital letter epsilon, U+0395 --> {"Zeta", "918"}, // greek capital letter zeta, U+0396 --> {"Eta", "919"}, // greek capital letter eta, U+0397 --> {"Theta", "920"}, // greek capital letter theta,U+0398 ISOgrk3 --> {"Iota", "921"}, // greek capital letter iota, U+0399 --> {"Kappa", "922"}, // greek capital letter kappa, U+039A --> {"Lambda", "923"}, // greek capital letter lambda,U+039B ISOgrk3 --> {"Mu", "924"}, // greek capital letter mu, U+039C --> {"Nu", "925"}, // greek capital letter nu, U+039D --> {"Xi", "926"}, // greek capital letter xi, U+039E ISOgrk3 --> {"Omicron", "927"}, // greek capital letter omicron, U+039F --> {"Pi", "928"}, // greek capital letter pi, U+03A0 ISOgrk3 --> {"Rho", "929"}, // greek capital letter rho, U+03A1 --> // {"Sigma", "931"}, // greek capital letter sigma,U+03A3 ISOgrk3 --> {"Tau", "932"}, // greek capital letter tau, U+03A4 --> {"Upsilon", "933"}, // greek capital letter upsilon,U+03A5 ISOgrk3 --> {"Phi", "934"}, // greek capital letter phi,U+03A6 ISOgrk3 --> {"Chi", "935"}, // greek capital letter chi, U+03A7 --> {"Psi", "936"}, // greek capital letter psi,U+03A8 ISOgrk3 --> {"Omega", "937"}, // greek capital letter omega,U+03A9 ISOgrk3 --> {"alpha", "945"}, // greek small letter alpha,U+03B1 ISOgrk3 --> {"beta", "946"}, // greek small letter beta, U+03B2 ISOgrk3 --> {"gamma", "947"}, // greek small letter gamma,U+03B3 ISOgrk3 --> {"delta", "948"}, // greek small letter delta,U+03B4 ISOgrk3 --> {"epsilon", "949"}, // greek small letter epsilon,U+03B5 ISOgrk3 --> {"zeta", "950"}, // greek small letter zeta, U+03B6 ISOgrk3 --> {"eta", "951"}, // greek small letter eta, U+03B7 ISOgrk3 --> {"theta", "952"}, // greek small letter theta,U+03B8 ISOgrk3 --> {"iota", "953"}, // greek small letter iota, U+03B9 ISOgrk3 --> {"kappa", "954"}, // greek small letter kappa,U+03BA ISOgrk3 --> {"lambda", "955"}, // greek small letter lambda,U+03BB ISOgrk3 --> {"mu", "956"}, // greek small letter mu, U+03BC ISOgrk3 --> {"nu", "957"}, // greek small letter nu, U+03BD ISOgrk3 --> {"xi", "958"}, // greek small letter xi, U+03BE ISOgrk3 --> {"omicron", "959"}, // greek small letter omicron, U+03BF NEW --> {"pi", "960"}, // greek small letter pi, U+03C0 ISOgrk3 --> {"rho", "961"}, // greek small letter rho, U+03C1 ISOgrk3 --> {"sigmaf", "962"}, // greek small letter final sigma,U+03C2 ISOgrk3 --> {"sigma", "963"}, // greek small letter sigma,U+03C3 ISOgrk3 --> {"tau", "964"}, // greek small letter tau, U+03C4 ISOgrk3 --> {"upsilon", "965"}, // greek small letter upsilon,U+03C5 ISOgrk3 --> {"phi", "966"}, // greek small letter phi, U+03C6 ISOgrk3 --> {"chi", "967"}, // greek small letter chi, U+03C7 ISOgrk3 --> {"psi", "968"}, // greek small letter psi, U+03C8 ISOgrk3 --> {"omega", "969"}, // greek small letter omega,U+03C9 ISOgrk3 --> {"thetasym", "977"}, // greek small letter theta symbol,U+03D1 NEW --> {"upsih", "978"}, // greek upsilon with hook symbol,U+03D2 NEW --> {"piv", "982"}, // greek pi symbol, U+03D6 ISOgrk3 --> // {"bull", "8226"}, // bullet = black small circle,U+2022 ISOpub --> // {"hellip", "8230"}, // horizontal ellipsis = three dot leader,U+2026 ISOpub --> {"prime", "8242"}, // prime = minutes = feet, U+2032 ISOtech --> {"Prime", "8243"}, // double prime = seconds = inches,U+2033 ISOtech --> {"oline", "8254"}, // overline = spacing overscore,U+203E NEW --> {"frasl", "8260"}, // fraction slash, U+2044 NEW --> // {"weierp", "8472"}, // script capital P = power set= Weierstrass p, U+2118 ISOamso --> {"image", "8465"}, // blackletter capital I = imaginary part,U+2111 ISOamso --> {"real", "8476"}, // blackletter capital R = real part symbol,U+211C ISOamso --> {"trade", "8482"}, // trade mark sign, U+2122 ISOnum --> {"alefsym", "8501"}, // alef symbol = first transfinite cardinal,U+2135 NEW --> // // {"larr", "8592"}, // leftwards arrow, U+2190 ISOnum --> {"uarr", "8593"}, // upwards arrow, U+2191 ISOnum--> {"rarr", "8594"}, // rightwards arrow, U+2192 ISOnum --> {"darr", "8595"}, // downwards arrow, U+2193 ISOnum --> {"harr", "8596"}, // left right arrow, U+2194 ISOamsa --> {"crarr", "8629"}, // downwards arrow with corner leftwards= carriage return, U+21B5 NEW --> {"lArr", "8656"}, // leftwards double arrow, U+21D0 ISOtech --> // {"uArr", "8657"}, // upwards double arrow, U+21D1 ISOamsa --> {"rArr", "8658"}, // rightwards double arrow,U+21D2 ISOtech --> // {"dArr", "8659"}, // downwards double arrow, U+21D3 ISOamsa --> {"hArr", "8660"}, // left right double arrow,U+21D4 ISOamsa --> // {"forall", "8704"}, // for all, U+2200 ISOtech --> {"part", "8706"}, // partial differential, U+2202 ISOtech --> {"exist", "8707"}, // there exists, U+2203 ISOtech --> {"empty", "8709"}, // empty set = null set = diameter,U+2205 ISOamso --> {"nabla", "8711"}, // nabla = backward difference,U+2207 ISOtech --> {"isin", "8712"}, // element of, U+2208 ISOtech --> {"notin", "8713"}, // not an element of, U+2209 ISOtech --> {"ni", "8715"}, // contains as member, U+220B ISOtech --> // {"prod", "8719"}, // n-ary product = product sign,U+220F ISOamsb --> // {"sum", "8721"}, // n-ary summation, U+2211 ISOamsb --> // {"minus", "8722"}, // minus sign, U+2212 ISOtech --> {"lowast", "8727"}, // asterisk operator, U+2217 ISOtech --> {"radic", "8730"}, // square root = radical sign,U+221A ISOtech --> {"prop", "8733"}, // proportional to, U+221D ISOtech --> {"infin", "8734"}, // infinity, U+221E ISOtech --> {"ang", "8736"}, // angle, U+2220 ISOamso --> {"and", "8743"}, // logical and = wedge, U+2227 ISOtech --> {"or", "8744"}, // logical or = vee, U+2228 ISOtech --> {"cap", "8745"}, // intersection = cap, U+2229 ISOtech --> {"cup", "8746"}, // union = cup, U+222A ISOtech --> {"int", "8747"}, // integral, U+222B ISOtech --> {"there4", "8756"}, // therefore, U+2234 ISOtech --> {"sim", "8764"}, // tilde operator = varies with = similar to,U+223C ISOtech --> // {"cong", "8773"}, // approximately equal to, U+2245 ISOtech --> {"asymp", "8776"}, // almost equal to = asymptotic to,U+2248 ISOamsr --> {"ne", "8800"}, // not equal to, U+2260 ISOtech --> {"equiv", "8801"}, // identical to, U+2261 ISOtech --> {"le", "8804"}, // less-than or equal to, U+2264 ISOtech --> {"ge", "8805"}, // greater-than or equal to,U+2265 ISOtech --> {"sub", "8834"}, // subset of, U+2282 ISOtech --> {"sup", "8835"}, // superset of, U+2283 ISOtech --> // {"sube", "8838"}, // subset of or equal to, U+2286 ISOtech --> {"supe", "8839"}, // superset of or equal to,U+2287 ISOtech --> {"oplus", "8853"}, // circled plus = direct sum,U+2295 ISOamsb --> {"otimes", "8855"}, // circled times = vector product,U+2297 ISOamsb --> {"perp", "8869"}, // up tack = orthogonal to = perpendicular,U+22A5 ISOtech --> {"sdot", "8901"}, // dot operator, U+22C5 ISOamsb --> // // {"lceil", "8968"}, // left ceiling = apl upstile,U+2308 ISOamsc --> {"rceil", "8969"}, // right ceiling, U+2309 ISOamsc --> {"lfloor", "8970"}, // left floor = apl downstile,U+230A ISOamsc --> {"rfloor", "8971"}, // right floor, U+230B ISOamsc --> {"lang", "9001"}, // left-pointing angle bracket = bra,U+2329 ISOtech --> // {"rang", "9002"}, // right-pointing angle bracket = ket,U+232A ISOtech --> // // {"loz", "9674"}, // lozenge, U+25CA ISOpub --> // {"spades", "9824"}, // black spade suit, U+2660 ISOpub --> // {"clubs", "9827"}, // black club suit = shamrock,U+2663 ISOpub --> {"hearts", "9829"}, // black heart suit = valentine,U+2665 ISOpub --> {"diams", "9830"}, // black diamond suit, U+2666 ISOpub --> // {"OElig", "338"}, // -- latin capital ligature OE,U+0152 ISOlat2 --> {"oelig", "339"}, // -- latin small ligature oe, U+0153 ISOlat2 --> // {"Scaron", "352"}, // -- latin capital letter S with caron,U+0160 ISOlat2 --> {"scaron", "353"}, // -- latin small letter s with caron,U+0161 ISOlat2 --> {"Yuml", "376"}, // -- latin capital letter Y with diaeresis,U+0178 ISOlat2 --> // {"circ", "710"}, // -- modifier letter circumflex accent,U+02C6 ISOpub --> {"tilde", "732"}, // small tilde, U+02DC ISOdia --> // {"ensp", "8194"}, // en space, U+2002 ISOpub --> {"emsp", "8195"}, // em space, U+2003 ISOpub --> {"thinsp", "8201"}, // thin space, U+2009 ISOpub --> {"zwnj", "8204"}, // zero width non-joiner,U+200C NEW RFC 2070 --> {"zwj", "8205"}, // zero width joiner, U+200D NEW RFC 2070 --> {"lrm", "8206"}, // left-to-right mark, U+200E NEW RFC 2070 --> {"rlm", "8207"}, // right-to-left mark, U+200F NEW RFC 2070 --> {"ndash", "8211"}, // en dash, U+2013 ISOpub --> {"mdash", "8212"}, // em dash, U+2014 ISOpub --> {"lsquo", "8216"}, // left single quotation mark,U+2018 ISOnum --> {"rsquo", "8217"}, // right single quotation mark,U+2019 ISOnum --> {"sbquo", "8218"}, // single low-9 quotation mark, U+201A NEW --> {"ldquo", "8220"}, // left double quotation mark,U+201C ISOnum --> {"rdquo", "8221"}, // right double quotation mark,U+201D ISOnum --> {"bdquo", "8222"}, // double low-9 quotation mark, U+201E NEW --> {"dagger", "8224"}, // dagger, U+2020 ISOpub --> {"Dagger", "8225"}, // double dagger, U+2021 ISOpub --> {"permil", "8240"}, // per mille sign, U+2030 ISOtech --> {"lsaquo", "8249"}, // single left-pointing angle quotation mark,U+2039 ISO proposed --> // {"rsaquo", "8250"}, // single right-pointing angle quotation mark,U+203A ISO proposed --> // {"euro", "8364"}, // -- euro sign, U+20AC NEW --> }; /** *

* The set of entities supported by standard XML. *

*/ public static final Entities XML; /** *

* The set of entities supported by HTML 3.2. *

*/ public static final Entities HTML32; /** *

* The set of entities supported by HTML 4.0. *

*/ public static final Entities HTML40; /** *

* The set of entities supported by the Ruby fast_xs extension. *

*/ public static final Entities FAST_XS; static { XML = new Entities(); XML.addEntities(BASIC_ARRAY); XML.addEntities(APOS_ARRAY); } static { HTML32 = new Entities(); HTML32.addEntities(BASIC_ARRAY); HTML32.addEntities(ISO8859_1_ARRAY); } static { HTML40 = new Entities(); fillWithHtml40Entities(HTML40); } static { FAST_XS = new Entities(); FAST_XS.addEntities(BASIC_ARRAY); } /** *

* Fills the specified entities instance with HTML 40 entities. *

* * @param entities * the instance to be filled. */ static void fillWithHtml40Entities(Entities entities) { entities.addEntities(BASIC_ARRAY); entities.addEntities(ISO8859_1_ARRAY); entities.addEntities(HTML40_ARRAY); } static interface EntityMap { /** *

* Add an entry to this entity map. *

* * @param name * the entity name * @param value * the entity value */ void add(String name, int value); /** *

* Returns the name of the entity identified by the specified value. *

* * @param value * the value to locate * @return entity name associated with the specified value */ String name(int value); /** *

* Returns the value of the entity identified by the specified name. *

* * @param name * the name to locate * @return entity value associated with the specified name */ int value(String name); } // Very limited IntHashMap - it only supports get() and put() private static class IntHashMap { private transient Entry table[]; private transient int count; private int threshold; private final float loadFactor; private static class Entry { final int hash; final int key; Object value; Entry next; protected Entry(int hash, int key, Object value, Entry next) { this.hash = hash; this.key = key; this.value = value; this.next = next; } } public IntHashMap() { this.loadFactor = 0.75f; table = new Entry[20]; threshold = (int) (20 * 0.75f); } public Object get(int key) { Entry tab[] = table; int hash = key; int index = (hash & 0x7FFFFFFF) % tab.length; for (Entry e = tab[index]; e != null; e = e.next) { if (e.hash == hash) { return e.value; } } return null; } protected void rehash() { int oldCapacity = table.length; Entry oldMap[] = table; int newCapacity = oldCapacity * 2 + 1; Entry newMap[] = new Entry[newCapacity]; threshold = (int) (newCapacity * loadFactor); table = newMap; for (int i = oldCapacity; i-- > 0;) { for (Entry old = oldMap[i]; old != null;) { Entry e = old; old = old.next; int index = (e.hash & 0x7FFFFFFF) % newCapacity; e.next = newMap[index]; newMap[index] = e; } } } public Object put(int key, Object value) { // Makes sure the key is not already in the hashtable. Entry tab[] = table; int hash = key; int index = (hash & 0x7FFFFFFF) % tab.length; for (Entry e = tab[index]; e != null; e = e.next) { if (e.hash == hash) { Object old = e.value; e.value = value; return old; } } if (count >= threshold) { // Rehash the table if the threshold is exceeded rehash(); tab = table; index = (hash & 0x7FFFFFFF) % tab.length; } // Creates the new entry. Entry e = new Entry(hash, key, value, tab[index]); tab[index] = e; count++; return null; } } static class PrimitiveEntityMap implements EntityMap { private Map mapNameToValue = new HashMap(); private IntHashMap mapValueToName = new IntHashMap(); /** * {@inheritDoc} */ public void add(String name, int value) { mapNameToValue.put(name, new Integer(value)); mapValueToName.put(value, name); } /** * {@inheritDoc} */ public String name(int value) { return (String) mapValueToName.get(value); } /** * {@inheritDoc} */ public int value(String name) { Object value = mapNameToValue.get(name); if (value == null) { return -1; } return ((Integer) value).intValue(); } } static abstract class MapIntMap implements Entities.EntityMap { protected Map mapNameToValue; protected Map mapValueToName; /** * {@inheritDoc} */ public void add(String name, int value) { mapNameToValue.put(name, new Integer(value)); mapValueToName.put(new Integer(value), name); } /** * {@inheritDoc} */ public String name(int value) { return (String) mapValueToName.get(new Integer(value)); } /** * {@inheritDoc} */ public int value(String name) { Object value = mapNameToValue.get(name); if (value == null) { return -1; } return ((Integer) value).intValue(); } } static class HashEntityMap extends MapIntMap { /** * Constructs a new instance of HashEntityMap. */ public HashEntityMap() { mapNameToValue = new HashMap(); mapValueToName = new HashMap(); } } static class TreeEntityMap extends MapIntMap { /** * Constructs a new instance of TreeEntityMap. */ public TreeEntityMap() { mapNameToValue = new TreeMap(); mapValueToName = new TreeMap(); } } static class LookupEntityMap extends PrimitiveEntityMap { private String[] lookupTable; private int LOOKUP_TABLE_SIZE = 256; /** * {@inheritDoc} */ public String name(int value) { if (value < LOOKUP_TABLE_SIZE) { return lookupTable()[value]; } return super.name(value); } /** *

* Returns the lookup table for this entity map. The lookup table is created if it has not been previously. *

* * @return the lookup table */ private String[] lookupTable() { if (lookupTable == null) { createLookupTable(); } return lookupTable; } /** *

* Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity names. *

*/ private void createLookupTable() { lookupTable = new String[LOOKUP_TABLE_SIZE]; for (int i = 0; i < LOOKUP_TABLE_SIZE; ++i) { lookupTable[i] = super.name(i); } } } static class ArrayEntityMap implements EntityMap { protected int growBy = 100; protected int size = 0; protected String[] names; protected int[] values; /** * Constructs a new instance of ArrayEntityMap. */ public ArrayEntityMap() { names = new String[growBy]; values = new int[growBy]; } /** * Constructs a new instance of ArrayEntityMap specifying the size by which the array should * grow. * * @param growBy * array will be initialized to and will grow by this amount */ public ArrayEntityMap(int growBy) { this.growBy = growBy; names = new String[growBy]; values = new int[growBy]; } /** * {@inheritDoc} */ public void add(String name, int value) { ensureCapacity(size + 1); names[size] = name; values[size] = value; size++; } /** * Verifies the capacity of the entity array, adjusting the size if necessary. * * @param capacity * size the array should be */ protected void ensureCapacity(int capacity) { if (capacity > names.length) { int newSize = Math.max(capacity, size + growBy); String[] newNames = new String[newSize]; System.arraycopy(names, 0, newNames, 0, size); names = newNames; int[] newValues = new int[newSize]; System.arraycopy(values, 0, newValues, 0, size); values = newValues; } } /** * {@inheritDoc} */ public String name(int value) { for (int i = 0; i < size; ++i) { if (values[i] == value) { return names[i]; } } return null; } /** * {@inheritDoc} */ public int value(String name) { for (int i = 0; i < size; ++i) { if (names[i].equals(name)) { return values[i]; } } return -1; } } static class BinaryEntityMap extends ArrayEntityMap { /** * Constructs a new instance of BinaryEntityMap. */ public BinaryEntityMap() { super(); } /** * Constructs a new instance of ArrayEntityMap specifying the size by which the underlying array * should grow. * * @param growBy * array will be initialized to and will grow by this amount */ public BinaryEntityMap(int growBy) { super(growBy); } /** * Performs a binary search of the entity array for the specified key. This method is based on code in * {@link java.util.Arrays}. * * @param key * the key to be found * @return the index of the entity array matching the specified key */ private int binarySearch(int key) { int low = 0; int high = size - 1; while (low <= high) { int mid = (low + high) >> 1; int midVal = values[mid]; if (midVal < key) { low = mid + 1; } else if (midVal > key) { high = mid - 1; } else { return mid; // key found } } return -(low + 1); // key not found. } /** * {@inheritDoc} */ public void add(String name, int value) { ensureCapacity(size + 1); int insertAt = binarySearch(value); if (insertAt > 0) { return; // note: this means you can't insert the same value twice } insertAt = -(insertAt + 1); // binarySearch returns it negative and off-by-one System.arraycopy(values, insertAt, values, insertAt + 1, size - insertAt); values[insertAt] = value; System.arraycopy(names, insertAt, names, insertAt + 1, size - insertAt); names[insertAt] = name; size++; } /** * {@inheritDoc} */ public String name(int value) { int index = binarySearch(value); if (index < 0) { return null; } return names[index]; } } // package scoped for testing EntityMap map = new Entities.LookupEntityMap(); /** *

* Adds entities to this entity. *

* * @param entityArray * array of entities to be added */ public void addEntities(String[][] entityArray) { for (int i = 0; i < entityArray.length; ++i) { addEntity(entityArray[i][0], Integer.parseInt(entityArray[i][1])); } } /** *

* Add an entity to this entity. *

* * @param name * name of the entity * @param value * vale of the entity */ public void addEntity(String name, int value) { map.add(name, value); } /** *

* Returns the name of the entity identified by the specified value. *

* * @param value * the value to locate * @return entity name associated with the specified value */ public String entityName(int value) { return map.name(value); } /** *

* Returns the value of the entity identified by the specified name. *

* * @param name * the name to locate * @return entity value associated with the specified name */ public int entityValue(String name) { return map.value(name); } /** *

* Escapes the characters in a String. *

* *

* For example, if you have called addEntity("foo", 0xA1), escape("\u00A1") will return * "&foo;" *

* * @param str * The String to escape. * @return A new escaped String. */ public String escape(String str) { StringWriter stringWriter = createStringWriter(str); try { this.escape(stringWriter, str); } catch (IOException e) { // This should never happen because ALL the StringWriter methods called by #escape(Writer, String) do not // throw IOExceptions. throw new RuntimeException(e); } return stringWriter.toString(); } /** *

* Escapes the characters in the String passed and writes the result to the Writer * passed. *

* * @param writer * The Writer to write the results of the escaping to. Assumed to be a non-null value. * @param str * The String to escape. Assumed to be a non-null value. * @throws IOException * when Writer passed throws the exception from calls to the {@link Writer#write(int)} * methods. * * @see #escape(String) * @see Writer */ public void escape(Writer writer, String str) throws IOException { int len = str.length(); for (int i = 0; i < len; i++) { char c = str.charAt(i); String entityName = this.entityName(c); if (entityName == null) { if (c > 0x7F) { writer.write("&#"); writer.write(Integer.toString(c, 10)); writer.write(';'); } else { writer.write(c); } } else { writer.write('&'); writer.write(entityName); writer.write(';'); } } } /** *

* Unescapes the entities in a String. *

* *

* For example, if you have called addEntity("foo", 0xA1), unescape("&foo;") will return * "\u00A1" *

* * @param str * The String to escape. * @return A new escaped String. */ public String unescape(String str) { int firstAmp = str.indexOf('&'); if (firstAmp < 0) { return str; } else { StringWriter stringWriter = createStringWriter(str); try { this.doUnescape(stringWriter, str, firstAmp); } catch (IOException e) { // This should never happen because ALL the StringWriter methods called by #escape(Writer, String) // do not throw IOExceptions. throw new RuntimeException(e); } return stringWriter.toString(); } } /** * Make the StringWriter 10% larger than the source String to avoid growing the writer * * @param str The source string * @return A newly created StringWriter */ private StringWriter createStringWriter(String str) { return new StringWriter((int) (str.length() + (str.length() * 0.1))); } /** *

* Unescapes the escaped entities in the String passed and writes the result to the * Writer passed. *

* * @param writer * The Writer to write the results to; assumed to be non-null. * @param str * The source String to unescape; assumed to be non-null. * @throws IOException * when Writer passed throws the exception from calls to the {@link Writer#write(int)} * methods. * * @see #escape(String) * @see Writer */ public void unescape(Writer writer, String str) throws IOException { int firstAmp = str.indexOf('&'); if (firstAmp < 0) { writer.write(str); return; } else { doUnescape(writer, str, firstAmp); } } /** * Underlying unescape method that allows the optimisation of not starting from the 0 index again. * * @param writer * The Writer to write the results to; assumed to be non-null. * @param str * The source String to unescape; assumed to be non-null. * @param firstAmp * The int index of the first ampersand in the source String. * @throws IOException * when Writer passed throws the exception from calls to the {@link Writer#write(int)} * methods. */ private void doUnescape(Writer writer, String str, int firstAmp) throws IOException { writer.write(str, 0, firstAmp); int len = str.length(); for (int i = firstAmp; i < len; i++) { char c = str.charAt(i); if (c == '&') { int nextIdx = i + 1; int semiColonIdx = str.indexOf(';', nextIdx); if (semiColonIdx == -1) { writer.write(c); continue; } int amphersandIdx = str.indexOf('&', i + 1); if (amphersandIdx != -1 && amphersandIdx < semiColonIdx) { // Then the text looks like &...&...; writer.write(c); continue; } String entityContent = str.substring(nextIdx, semiColonIdx); int entityValue = -1; int entityContentLen = entityContent.length(); if (entityContentLen > 0) { if (entityContent.charAt(0) == '#') { // escaped value content is an integer (decimal or // hexidecimal) if (entityContentLen > 1) { char isHexChar = entityContent.charAt(1); try { switch (isHexChar) { case 'X' : case 'x' : { entityValue = Integer.parseInt(entityContent.substring(2), 16); break; } default : { entityValue = Integer.parseInt(entityContent.substring(1), 10); } } if (entityValue > 0xFFFF) { entityValue = -1; } } catch (NumberFormatException e) { entityValue = -1; } } } else { // escaped value content is an entity name entityValue = this.entityValue(entityContent); } } if (entityValue == -1) { writer.write('&'); writer.write(entityContent); writer.write(';'); } else { writer.write(entityValue); } i = semiColonIdx; // move index up to the semi-colon } else { writer.write(c); } } } } hpricot-0.8.6/ext/hpricot_scan/0000755000175000017500000000000011710073440016024 5ustar boutilboutilhpricot-0.8.6/ext/hpricot_scan/hpricot_scan.rl0000644000175000017500000006741311710073440021052 0ustar boutilboutil/* * hpricot_scan.rl * * $Author: why $ * $Date: 2006-05-08 22:03:50 -0600 (Mon, 08 May 2006) $ * * Copyright (C) 2006, 2010 why the lucky stiff */ #include #include struct hpricot_struct { int len; VALUE* ptr; }; #ifndef RARRAY_LEN #define RARRAY_LEN(arr) RARRAY(arr)->len #define RSTRING_LEN(str) RSTRING(str)->len #define RSTRING_PTR(str) RSTRING(str)->ptr #endif VALUE hpricot_css(VALUE, VALUE, VALUE, VALUE, VALUE); #define NO_WAY_SERIOUSLY "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!" static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment, sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno, sym_allowed, sym_text, sym_children, sym_EMPTY, sym_CDATA; static VALUE mHpricot, rb_eHpricotParseError; static VALUE cBogusETag, cCData, cComment, cDoc, cDocType, cElem, cText, cXMLDecl, cProcIns, symAllow, symDeny; static ID s_ElementContent; static ID s_downcase, s_new, s_parent, s_read, s_to_str; static VALUE reProcInsParse; #define H_ELE_TAG 0 #define H_ELE_PARENT 1 #define H_ELE_ATTR 2 #define H_ELE_ETAG 3 #define H_ELE_RAW 4 #define H_ELE_EC 5 #define H_ELE_HASH 6 #define H_ELE_CHILDREN 7 #define HSTRUCT_PTR(ele) ((struct hpricot_struct*)DATA_PTR(ele))->ptr #define H_ELE_GET(ele, idx) HSTRUCT_PTR(ele)[idx] #define H_ELE_SET(ele, idx, val) HSTRUCT_PTR(ele)[idx] = val #define OPT(opts, key) (!NIL_P(opts) && RTEST(rb_hash_aref(opts, ID2SYM(rb_intern("" # key))))) #ifdef HAVE_RUBY_ENCODING_H #include # define ASSOCIATE_INDEX(s) rb_enc_associate_index((s), encoding_index) # define ENCODING_INDEX , encoding_index #else # define ASSOCIATE_INDEX(s) # define ENCODING_INDEX #endif #define ELE(N) \ if (te > ts || text == 1) { \ char *raw = NULL; \ int rawlen = 0; \ ele_open = 0; text = 0; \ if (ts != 0 && sym_##N != sym_cdata && sym_##N != sym_text && sym_##N != sym_procins && sym_##N != sym_comment) { \ raw = ts; rawlen = te - ts; \ } \ if (rb_block_given_p()) { \ VALUE raw_string = Qnil; \ if (raw != NULL) { \ raw_string = rb_str_new(raw, rawlen); \ ASSOCIATE_INDEX(raw_string); \ } \ rb_yield_tokens(sym_##N, tag, attr, Qnil, taint); \ } else \ rb_hpricot_token(S, sym_##N, tag, attr, raw, rawlen, taint ENCODING_INDEX); \ } #define SET(N, E) \ if (mark_##N == NULL || E == mark_##N) { \ N = rb_str_new2(""); \ ASSOCIATE_INDEX(N); \ } else if (E > mark_##N) { \ N = rb_str_new(mark_##N, E - mark_##N); \ ASSOCIATE_INDEX(N); \ } #define CAT(N, E) if (NIL_P(N)) { SET(N, E); } else { rb_str_cat(N, mark_##N, E - mark_##N); } #define SLIDE(N) if (mark_##N > ts) mark_##N = buf + (mark_##N - ts); #define ATTR(K, V) \ if (!NIL_P(K)) { \ if (NIL_P(attr)) attr = rb_hash_new(); \ rb_hash_aset(attr, K, V); \ } #define TEXT_PASS() \ if (text == 0) \ { \ if (ele_open == 1) { \ ele_open = 0; \ if (ts > 0) { \ mark_tag = ts; \ } \ } else { \ mark_tag = p; \ } \ attr = Qnil; \ tag = Qnil; \ text = 1; \ } #define EBLK(N, T) CAT(tag, p - T + 1); ELE(N); %%{ machine hpricot_scan; action newEle { if (text == 1) { CAT(tag, p); ELE(text); text = 0; } attr = Qnil; tag = Qnil; mark_tag = NULL; ele_open = 1; } action _tag { mark_tag = p; } action _aval { mark_aval = p; } action _akey { mark_akey = p; } action tag { SET(tag, p); } action tagc { SET(tag, p-1); } action aval { SET(aval, p); } action aunq { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } action akey { SET(akey, p); } action xmlver { SET(aval, p); ATTR(ID2SYM(rb_intern("version")), aval); } action xmlenc { #ifdef HAVE_RUBY_ENCODING_H if (mark_aval < p) { char psave = *p; *p = '\0'; encoding_index = rb_enc_find_index(mark_aval); *p = psave; } #endif SET(aval, p); ATTR(ID2SYM(rb_intern("encoding")), aval); } action xmlsd { SET(aval, p); ATTR(ID2SYM(rb_intern("standalone")), aval); } action pubid { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } action sysid { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } action new_attr { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } action save_attr { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } include hpricot_common "hpricot_common.rl"; }%% %% write data nofinal; #define BUFSIZE 16384 void rb_yield_tokens(VALUE sym, VALUE tag, VALUE attr, VALUE raw, int taint) { VALUE ary; if (sym == sym_text) { raw = tag; } ary = rb_ary_new3(4, sym, tag, attr, raw); if (taint) { OBJ_TAINT(ary); OBJ_TAINT(tag); OBJ_TAINT(attr); OBJ_TAINT(raw); } rb_yield(ary); } #ifndef RHASH_TBL /* rb_hash_lookup() is only in Ruby 1.8.7 */ static VALUE our_rb_hash_lookup(VALUE hash, VALUE key) { VALUE val; if (!st_lookup(RHASH(hash)->tbl, key, &val)) { return Qnil; /* without Hash#default */ } return val; } #define rb_hash_lookup our_rb_hash_lookup #endif static void rb_hpricot_add(VALUE focus, VALUE ele) { VALUE children = H_ELE_GET(focus, H_ELE_CHILDREN); if (NIL_P(children)) H_ELE_SET(focus, H_ELE_CHILDREN, (children = rb_ary_new2(1))); rb_ary_push(children, ele); H_ELE_SET(ele, H_ELE_PARENT, focus); } typedef struct { VALUE doc; VALUE focus; VALUE last; VALUE EC; unsigned char xml, strict, fixup; } hpricot_state; #define H_PROP(prop, idx) \ static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \ H_ELE_SET(self, idx, x); \ return self; \ } \ static VALUE hpricot_ele_clear_##prop(VALUE self) { \ H_ELE_SET(self, idx, Qnil); \ return Qtrue; \ } \ static VALUE hpricot_ele_get_##prop(VALUE self) { \ return H_ELE_GET(self, idx); \ } #define H_ATTR(prop) \ static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \ rb_hash_aset(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop)), x); \ return self; \ } \ static VALUE hpricot_ele_get_##prop(VALUE self) { \ return rb_hash_aref(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop))); \ } H_PROP(name, H_ELE_TAG); H_PROP(raw, H_ELE_RAW); H_PROP(parent, H_ELE_PARENT); H_PROP(attr, H_ELE_ATTR); H_PROP(etag, H_ELE_ETAG); H_PROP(children, H_ELE_CHILDREN); H_ATTR(target); H_ATTR(encoding); H_ATTR(version); H_ATTR(standalone); H_ATTR(system_id); H_ATTR(public_id); #define H_ELE(klass) \ ele = rb_obj_alloc(klass); \ if (klass == cElem) { \ H_ELE_SET(ele, H_ELE_TAG, tag); \ H_ELE_SET(ele, H_ELE_ATTR, attr); \ H_ELE_SET(ele, H_ELE_EC, ec); \ if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_doctype)) { \ VALUE raw_str = rb_str_new(raw, rawlen); \ ASSOCIATE_INDEX(raw_str); \ H_ELE_SET(ele, H_ELE_RAW, raw_str); \ } \ } else if (klass == cDocType || klass == cProcIns || klass == cXMLDecl || klass == cBogusETag) { \ if (klass == cBogusETag) { \ H_ELE_SET(ele, H_ELE_TAG, tag); \ if (raw != NULL) { \ VALUE raw_str = rb_str_new(raw, rawlen); \ ASSOCIATE_INDEX(raw_str); \ H_ELE_SET(ele, H_ELE_ATTR, raw_str); \ } \ } else { \ if (klass == cDocType) \ ATTR(ID2SYM(rb_intern("target")), tag); \ H_ELE_SET(ele, H_ELE_ATTR, attr); \ if (klass != cProcIns) { \ tag = Qnil; \ if (raw != NULL) { \ tag = rb_str_new(raw, rawlen); \ ASSOCIATE_INDEX(tag); \ } \ } \ H_ELE_SET(ele, H_ELE_TAG, tag); \ } \ } else { \ H_ELE_SET(ele, H_ELE_TAG, tag); \ } \ S->last = ele // // the swift, compact parser logic. most of the complicated stuff is done // in the lexer. this step just pairs up the start and end tags. // void rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw, int rawlen, int taint #ifdef HAVE_RUBY_ENCODING_H , int encoding_index #endif ) { VALUE ele, ec = Qnil; // // in html mode, fix up start tags incorrectly formed as empty tags // if (!S->xml) { if (sym == sym_emptytag || sym == sym_stag || sym == sym_etag) { ec = rb_hash_aref(S->EC, tag); if (NIL_P(ec)) { tag = rb_funcall(tag, s_downcase, 0); ec = rb_hash_aref(S->EC, tag); } } if (H_ELE_GET(S->focus, H_ELE_EC) == sym_CDATA && (sym != sym_procins && sym != sym_comment && sym != sym_cdata && sym != sym_text) && !(sym == sym_etag && INT2FIX(rb_str_hash(tag)) == H_ELE_GET(S->focus, H_ELE_HASH))) { sym = sym_text; tag = rb_str_new(raw, rawlen); ASSOCIATE_INDEX(tag); } if (!NIL_P(ec)) { if (sym == sym_emptytag) { if (ec != sym_EMPTY) sym = sym_stag; } else if (sym == sym_stag) { if (ec == sym_EMPTY) sym = sym_emptytag; } } } if (sym == sym_emptytag || sym == sym_stag) { VALUE name = INT2FIX(rb_str_hash(tag)); H_ELE(cElem); H_ELE_SET(ele, H_ELE_HASH, name); if (!S->xml) { VALUE match = Qnil, e = S->focus; while (e != S->doc) { if (ec == Qnil) { // anything can contain unknown elements if (match == Qnil) match = e; } else { VALUE hEC = H_ELE_GET(e, H_ELE_EC); if (TYPE(hEC) == T_HASH) { VALUE has = rb_hash_lookup(hEC, name); if (has != Qnil) { if (has == Qtrue) { if (match == Qnil) match = e; } else if (has == symAllow) { match = S->focus; } else if (has == symDeny) { match = Qnil; } } } else { // Unknown elements can contain anything if (match == Qnil) match = e; } } e = H_ELE_GET(e, H_ELE_PARENT); } if (match == Qnil) match = S->focus; S->focus = match; } rb_hpricot_add(S->focus, ele); // // in the case of a start tag that should be empty, just // skip the step that focuses the element. focusing moves // us deeper into the document. // if (sym == sym_stag) { if (S->xml || ec != sym_EMPTY) { S->focus = ele; S->last = Qnil; } } } else if (sym == sym_etag) { VALUE name, match = Qnil, e = S->focus; if (S->strict) { if (NIL_P(rb_hash_aref(S->EC, tag))) { tag = rb_str_new2("div"); ASSOCIATE_INDEX(tag); } } // // another optimization will be to improve this very simple // O(n) tag search, where n is the depth of the focused tag. // // (see also: the search above for fixups) // name = INT2FIX(rb_str_hash(tag)); while (e != S->doc) { if (H_ELE_GET(e, H_ELE_HASH) == name) { match = e; break; } e = H_ELE_GET(e, H_ELE_PARENT); } if (NIL_P(match)) { H_ELE(cBogusETag); rb_hpricot_add(S->focus, ele); } else { VALUE ele = Qnil; if (raw != NULL) { ele = rb_str_new(raw, rawlen); ASSOCIATE_INDEX(ele); } H_ELE_SET(match, H_ELE_ETAG, ele); S->focus = H_ELE_GET(match, H_ELE_PARENT); S->last = Qnil; } } else if (sym == sym_cdata) { H_ELE(cCData); rb_hpricot_add(S->focus, ele); } else if (sym == sym_comment) { H_ELE(cComment); rb_hpricot_add(S->focus, ele); } else if (sym == sym_doctype) { H_ELE(cDocType); if (S->strict) { VALUE id; id = rb_str_new2("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"); ASSOCIATE_INDEX(id); rb_hash_aset(attr, ID2SYM(rb_intern("system_id")), id); id = rb_str_new2("-//W3C//DTD XHTML 1.0 Strict//EN"); ASSOCIATE_INDEX(id); rb_hash_aset(attr, ID2SYM(rb_intern("public_id")), id); } rb_hpricot_add(S->focus, ele); } else if (sym == sym_procins) { VALUE match = rb_funcall(tag, rb_intern("match"), 1, reProcInsParse); tag = rb_reg_nth_match(1, match); attr = rb_reg_nth_match(2, match); { H_ELE(cProcIns); rb_hpricot_add(S->focus, ele); } } else if (sym == sym_text) { // TODO: add raw_string as well? if (!NIL_P(S->last) && RTEST(rb_obj_is_instance_of(S->last, cText))) { rb_str_append(H_ELE_GET(S->last, H_ELE_TAG), tag); } else { H_ELE(cText); rb_hpricot_add(S->focus, ele); } } else if (sym == sym_xmldecl) { H_ELE(cXMLDecl); rb_hpricot_add(S->focus, ele); } } VALUE hpricot_scan(int argc, VALUE *argv, VALUE self) { int cs, act, have = 0, nread = 0, curline = 1, text = 0, io = 0; char *ts = 0, *te = 0, *buf = NULL, *eof = NULL; hpricot_state *S = NULL; VALUE port, opts; VALUE attr = Qnil, tag = Qnil, akey = Qnil, aval = Qnil, bufsize = Qnil; char *mark_tag = 0, *mark_akey = 0, *mark_aval = 0; int done = 0, ele_open = 0, buffer_size = 0, taint = 0; #ifdef HAVE_RUBY_ENCODING_H int encoding_index = rb_enc_to_index(rb_default_external_encoding()); #endif rb_scan_args(argc, argv, "11", &port, &opts); taint = OBJ_TAINTED(port); io = rb_respond_to(port, s_read); if (!io) { if (rb_respond_to(port, s_to_str)) { port = rb_funcall(port, s_to_str, 0); StringValue(port); } else { rb_raise(rb_eArgError, "an Hpricot document must be built from an input source (a String or IO object.)"); } } if (TYPE(opts) != T_HASH) opts = Qnil; if (!rb_block_given_p()) { S = ALLOC(hpricot_state); S->doc = rb_obj_alloc(cDoc); rb_gc_register_address(&S->doc); S->focus = S->doc; S->last = Qnil; S->xml = OPT(opts, xml); S->strict = OPT(opts, xhtml_strict); S->fixup = OPT(opts, fixup_tags); if (S->strict) S->fixup = 1; rb_ivar_set(S->doc, rb_intern("@options"), opts); S->EC = rb_const_get(mHpricot, s_ElementContent); } buffer_size = BUFSIZE; if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) { bufsize = rb_ivar_get(self, rb_intern("@buffer_size")); if (!NIL_P(bufsize)) { buffer_size = NUM2INT(bufsize); } } if (io) buf = ALLOC_N(char, buffer_size); %% write init; while (!done) { VALUE str; char *p, *pe; int len, space = buffer_size - have, tokstart_diff, tokend_diff, mark_tag_diff, mark_akey_diff, mark_aval_diff; if (io) { if (space == 0) { /* We've used up the entire buffer storing an already-parsed token * prefix that must be preserved. Likely caused by super-long attributes. * Increase buffer size and continue */ tokstart_diff = ts - buf; tokend_diff = te - buf; mark_tag_diff = mark_tag - buf; mark_akey_diff = mark_akey - buf; mark_aval_diff = mark_aval - buf; buffer_size += BUFSIZE; REALLOC_N(buf, char, buffer_size); space = buffer_size - have; ts = buf + tokstart_diff; te = buf + tokend_diff; mark_tag = buf + mark_tag_diff; mark_akey = buf + mark_akey_diff; mark_aval = buf + mark_aval_diff; } p = buf + have; str = rb_funcall(port, s_read, 1, INT2FIX(space)); len = RSTRING_LEN(str); memcpy(p, StringValuePtr(str), len); } else { p = RSTRING_PTR(port); len = RSTRING_LEN(port) + 1; done = 1; } nread += len; /* If this is the last buffer, tack on an EOF. */ if (io && len < space) { p[len++] = 0; done = 1; } pe = p + len; %% write exec; if (cs == hpricot_scan_error) { if (buf != NULL) free(buf); if (!NIL_P(tag)) { rb_raise(rb_eHpricotParseError, "parse error on element <%s>, starting on line %d.\n" NO_WAY_SERIOUSLY, RSTRING_PTR(tag), curline); } else { rb_raise(rb_eHpricotParseError, "parse error on line %d.\n" NO_WAY_SERIOUSLY, curline); } } if (done && ele_open) { ele_open = 0; if (ts > 0) { mark_tag = ts; ts = 0; text = 1; } } if (ts == 0) { have = 0; /* text nodes have no ts because each byte is parsed alone */ if (mark_tag != NULL && text == 1) { if (done) { if (mark_tag < p-1) { CAT(tag, p-1); ELE(text); } } else { CAT(tag, p); } } if (io) mark_tag = buf; else mark_tag = RSTRING_PTR(port); } else if (io) { have = pe - ts; memmove(buf, ts, have); SLIDE(tag); SLIDE(akey); SLIDE(aval); te = buf + (te - ts); ts = buf; } } if (buf != NULL) free(buf); if (S != NULL) { VALUE doc = S->doc; rb_gc_unregister_address(&S->doc); free(S); return doc; } return Qnil; } void hstruct_mark(void* ptr) { struct hpricot_struct* st = (struct hpricot_struct*)ptr; int i; /* it's likely to hit GC when allocating st->ptr. * that should be checked to avoid segfault. * and simply ignore it. */ if (st->ptr) { for(i = 0; i < st->len; i++) { rb_gc_mark(st->ptr[i]); } } } void hstruct_free(void* ptr) { struct hpricot_struct* st = (struct hpricot_struct*)ptr; free(st->ptr); free(st); } static VALUE alloc_hpricot_struct8(VALUE klass) { VALUE obj; struct hpricot_struct* st; obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st); st->len = 8; st->ptr = ALLOC_N(VALUE, 8); rb_mem_clear(st->ptr, 8); return obj; } static VALUE alloc_hpricot_struct2(VALUE klass) { VALUE obj; struct hpricot_struct* st; obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st); st->len = 2; st->ptr = ALLOC_N(VALUE, 2); rb_mem_clear(st->ptr, 2); return obj; } static VALUE alloc_hpricot_struct3(VALUE klass) { VALUE obj; struct hpricot_struct* st; obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st); st->len = 3; st->ptr = ALLOC_N(VALUE, 3); rb_mem_clear(st->ptr, 3); return obj; } static VALUE hpricot_struct_ref0(VALUE obj) {return H_ELE_GET(obj, 0);} static VALUE hpricot_struct_ref1(VALUE obj) {return H_ELE_GET(obj, 1);} static VALUE hpricot_struct_ref2(VALUE obj) {return H_ELE_GET(obj, 2);} static VALUE hpricot_struct_ref3(VALUE obj) {return H_ELE_GET(obj, 3);} static VALUE hpricot_struct_ref4(VALUE obj) {return H_ELE_GET(obj, 4);} static VALUE hpricot_struct_ref5(VALUE obj) {return H_ELE_GET(obj, 5);} static VALUE hpricot_struct_ref6(VALUE obj) {return H_ELE_GET(obj, 6);} static VALUE hpricot_struct_ref7(VALUE obj) {return H_ELE_GET(obj, 7);} static VALUE hpricot_struct_ref8(VALUE obj) {return H_ELE_GET(obj, 8);} static VALUE hpricot_struct_ref9(VALUE obj) {return H_ELE_GET(obj, 9);} static VALUE (*ref_func[10])() = { hpricot_struct_ref0, hpricot_struct_ref1, hpricot_struct_ref2, hpricot_struct_ref3, hpricot_struct_ref4, hpricot_struct_ref5, hpricot_struct_ref6, hpricot_struct_ref7, hpricot_struct_ref8, hpricot_struct_ref9, }; static VALUE hpricot_struct_set0(VALUE obj, VALUE val) {return H_ELE_SET(obj, 0, val);} static VALUE hpricot_struct_set1(VALUE obj, VALUE val) {return H_ELE_SET(obj, 1, val);} static VALUE hpricot_struct_set2(VALUE obj, VALUE val) {return H_ELE_SET(obj, 2, val);} static VALUE hpricot_struct_set3(VALUE obj, VALUE val) {return H_ELE_SET(obj, 3, val);} static VALUE hpricot_struct_set4(VALUE obj, VALUE val) {return H_ELE_SET(obj, 4, val);} static VALUE hpricot_struct_set5(VALUE obj, VALUE val) {return H_ELE_SET(obj, 5, val);} static VALUE hpricot_struct_set6(VALUE obj, VALUE val) {return H_ELE_SET(obj, 6, val);} static VALUE hpricot_struct_set7(VALUE obj, VALUE val) {return H_ELE_SET(obj, 7, val);} static VALUE hpricot_struct_set8(VALUE obj, VALUE val) {return H_ELE_SET(obj, 8, val);} static VALUE hpricot_struct_set9(VALUE obj, VALUE val) {return H_ELE_SET(obj, 9, val);} static VALUE (*set_func[10])() = { hpricot_struct_set0, hpricot_struct_set1, hpricot_struct_set2, hpricot_struct_set3, hpricot_struct_set4, hpricot_struct_set5, hpricot_struct_set6, hpricot_struct_set7, hpricot_struct_set8, hpricot_struct_set9, }; static VALUE make_hpricot_struct(VALUE members, VALUE (*alloc)(VALUE klass)) { int i = 0; char attr_set[128]; VALUE klass = rb_class_new(rb_cObject); rb_define_alloc_func(klass, alloc); int len = RARRAY_LEN(members); assert(len < 10); for (i = 0; i < len; i++) { ID id = SYM2ID(rb_ary_entry(members, i)); const char* name = rb_id2name(id); int len = strlen(name); memcpy(attr_set, name, strlen(name)); attr_set[len] = '='; attr_set[len+1] = 0; rb_define_method(klass, name, ref_func[i], 0); rb_define_method(klass, attr_set, set_func[i], 1); } return klass; } void Init_hpricot_scan() { VALUE structElem, structAttr, structBasic; s_ElementContent = rb_intern("ElementContent"); symAllow = ID2SYM(rb_intern("allow")); symDeny = ID2SYM(rb_intern("deny")); s_downcase = rb_intern("downcase"); s_new = rb_intern("new"); s_parent = rb_intern("parent"); s_read = rb_intern("read"); s_to_str = rb_intern("to_str"); sym_xmldecl = ID2SYM(rb_intern("xmldecl")); sym_doctype = ID2SYM(rb_intern("doctype")); sym_procins = ID2SYM(rb_intern("procins")); sym_stag = ID2SYM(rb_intern("stag")); sym_etag = ID2SYM(rb_intern("etag")); sym_emptytag = ID2SYM(rb_intern("emptytag")); sym_allowed = ID2SYM(rb_intern("allowed")); sym_children = ID2SYM(rb_intern("children")); sym_comment = ID2SYM(rb_intern("comment")); sym_cdata = ID2SYM(rb_intern("cdata")); sym_name = ID2SYM(rb_intern("name")); sym_parent = ID2SYM(rb_intern("parent")); sym_raw_attributes = ID2SYM(rb_intern("raw_attributes")); sym_raw_string = ID2SYM(rb_intern("raw_string")); sym_tagno = ID2SYM(rb_intern("tagno")); sym_text = ID2SYM(rb_intern("text")); sym_EMPTY = ID2SYM(rb_intern("EMPTY")); sym_CDATA = ID2SYM(rb_intern("CDATA")); mHpricot = rb_define_module("Hpricot"); rb_define_attr(rb_singleton_class(mHpricot), "buffer_size", 1, 1); rb_define_singleton_method(mHpricot, "scan", hpricot_scan, -1); rb_define_singleton_method(mHpricot, "css", hpricot_css, 3); rb_eHpricotParseError = rb_define_class_under(mHpricot, "ParseError", rb_eStandardError); structElem = make_hpricot_struct(rb_ary_new3(8, sym_name, sym_parent, sym_raw_attributes, sym_etag, sym_raw_string, sym_allowed, sym_tagno, sym_children), alloc_hpricot_struct8); structAttr = make_hpricot_struct( rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes), alloc_hpricot_struct3); structBasic = make_hpricot_struct( rb_ary_new3(2, sym_name, sym_parent), alloc_hpricot_struct2); cDoc = rb_define_class_under(mHpricot, "Doc", structElem); cCData = rb_define_class_under(mHpricot, "CData", structBasic); rb_define_method(cCData, "content", hpricot_ele_get_name, 0); rb_define_method(cCData, "content=", hpricot_ele_set_name, 1); cComment = rb_define_class_under(mHpricot, "Comment", structBasic); rb_define_method(cComment, "content", hpricot_ele_get_name, 0); rb_define_method(cComment, "content=", hpricot_ele_set_name, 1); cDocType = rb_define_class_under(mHpricot, "DocType", structAttr); rb_define_method(cDocType, "raw_string", hpricot_ele_get_name, 0); rb_define_method(cDocType, "clear_raw", hpricot_ele_clear_name, 0); rb_define_method(cDocType, "target", hpricot_ele_get_target, 0); rb_define_method(cDocType, "target=", hpricot_ele_set_target, 1); rb_define_method(cDocType, "public_id", hpricot_ele_get_public_id, 0); rb_define_method(cDocType, "public_id=", hpricot_ele_set_public_id, 1); rb_define_method(cDocType, "system_id", hpricot_ele_get_system_id, 0); rb_define_method(cDocType, "system_id=", hpricot_ele_set_system_id, 1); cElem = rb_define_class_under(mHpricot, "Elem", structElem); rb_define_method(cElem, "clear_raw", hpricot_ele_clear_raw, 0); cBogusETag = rb_define_class_under(mHpricot, "BogusETag", structAttr); rb_define_method(cBogusETag, "raw_string", hpricot_ele_get_attr, 0); rb_define_method(cBogusETag, "clear_raw", hpricot_ele_clear_attr, 0); cText = rb_define_class_under(mHpricot, "Text", structBasic); rb_define_method(cText, "raw_string", hpricot_ele_get_name, 0); rb_define_method(cText, "clear_raw", hpricot_ele_clear_name, 0); rb_define_method(cText, "content", hpricot_ele_get_name, 0); rb_define_method(cText, "content=", hpricot_ele_set_name, 1); cXMLDecl = rb_define_class_under(mHpricot, "XMLDecl", structAttr); rb_define_method(cXMLDecl, "raw_string", hpricot_ele_get_name, 0); rb_define_method(cXMLDecl, "clear_raw", hpricot_ele_clear_name, 0); rb_define_method(cXMLDecl, "encoding", hpricot_ele_get_encoding, 0); rb_define_method(cXMLDecl, "encoding=", hpricot_ele_set_encoding, 1); rb_define_method(cXMLDecl, "standalone", hpricot_ele_get_standalone, 0); rb_define_method(cXMLDecl, "standalone=", hpricot_ele_set_standalone, 1); rb_define_method(cXMLDecl, "version", hpricot_ele_get_version, 0); rb_define_method(cXMLDecl, "version=", hpricot_ele_set_version, 1); cProcIns = rb_define_class_under(mHpricot, "ProcIns", structAttr); rb_define_method(cProcIns, "target", hpricot_ele_get_name, 0); rb_define_method(cProcIns, "target=", hpricot_ele_set_name, 1); rb_define_method(cProcIns, "content", hpricot_ele_get_attr, 0); rb_define_method(cProcIns, "content=", hpricot_ele_set_attr, 1); rb_const_set(mHpricot, rb_intern("ProcInsParse"), reProcInsParse = rb_eval_string("/\\A<\\?(\\S+)\\s+(.+)/m")); } hpricot-0.8.6/ext/hpricot_scan/hpricot_scan.java.rl0000644000175000017500000012630111710073440021762 0ustar boutilboutil import java.io.IOException; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyHash; import org.jruby.RubyModule; import org.jruby.RubyNumeric; import org.jruby.RubyObject; import org.jruby.RubyObjectAdapter; import org.jruby.RubyRegexp; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; import org.jruby.javasupport.JavaEmbedUtils; import org.jruby.runtime.Arity; import org.jruby.runtime.Block; import org.jruby.runtime.ObjectAllocator; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.callback.Callback; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.load.BasicLibraryService; import org.jruby.util.ByteList; public class HpricotScanService implements BasicLibraryService { public static byte[] realloc(byte[] input, int size) { byte[] newArray = new byte[size]; System.arraycopy(input, 0, newArray, 0, input.length); return newArray; } // hpricot_state public static class State { public IRubyObject doc; public IRubyObject focus; public IRubyObject last; public IRubyObject EC; public boolean xml, strict, fixup; } static boolean OPT(IRubyObject opts, String key) { Ruby runtime = opts.getRuntime(); return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue(); } // H_PROP(name, H_ELE_TAG) public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_TAG, x); return self; } public static IRubyObject hpricot_ele_clear_name(IRubyObject self) { H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_name(IRubyObject self) { return H_ELE_GET(self, H_ELE_TAG); } // H_PROP(raw, H_ELE_RAW) public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_RAW, x); return self; } public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) { H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_raw(IRubyObject self) { return H_ELE_GET(self, H_ELE_RAW); } // H_PROP(parent, H_ELE_PARENT) public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_PARENT, x); return self; } public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) { H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_parent(IRubyObject self) { return H_ELE_GET(self, H_ELE_PARENT); } // H_PROP(attr, H_ELE_ATTR) public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_ATTR, x); return self; } public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) { H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_attr(IRubyObject self) { return H_ELE_GET(self, H_ELE_ATTR); } // H_PROP(etag, H_ELE_ETAG) public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_ETAG, x); return self; } public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) { H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_etag(IRubyObject self) { return H_ELE_GET(self, H_ELE_ETAG); } // H_PROP(children, H_ELE_CHILDREN) public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_CHILDREN, x); return self; } public static IRubyObject hpricot_ele_clear_children(IRubyObject self) { H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_children(IRubyObject self) { return H_ELE_GET(self, H_ELE_CHILDREN); } // H_ATTR(target) public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) { H_ELE_GET_asHash(self, H_ELE_ATTR).fastASet(self.getRuntime().newSymbol("target"), x); return self; } public static IRubyObject hpricot_ele_get_target(IRubyObject self) { return H_ELE_GET_asHash(self, H_ELE_ATTR).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target")); } // H_ATTR(encoding) public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x); return self; } public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding")); } // H_ATTR(version) public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x); return self; } public static IRubyObject hpricot_ele_get_version(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version")); } // H_ATTR(standalone) public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x); return self; } public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone")); } // H_ATTR(system_id) public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x); return self; } public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id")); } // H_ATTR(public_id) public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x); return self; } public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id")); } public static class Scanner { public IRubyObject SET(int mark, int E, IRubyObject org) { if(mark == -1 || E == mark) { return runtime.newString(""); } else if(E > mark) { return RubyString.newString(runtime, data, mark, E-mark); } else { return org; } } public int SLIDE(int N) { if(N > ts) { return N - ts; } else { return N; } } public IRubyObject CAT(IRubyObject N, int mark, int E) { if(N.isNil()) { return SET(mark, E, N); } else { ((RubyString)N).cat(data, mark, E-mark); return N; } } public void ATTR(IRubyObject K, IRubyObject V) { if(!K.isNil()) { if(attr.isNil()) { attr = RubyHash.newHash(runtime); } ((RubyHash)attr).fastASet(K, V); } } public void TEXT_PASS() { if(!text) { if(ele_open) { ele_open = false; if(ts != -1) { mark_tag = ts; } } else { mark_tag = p; } attr = runtime.getNil(); tag = runtime.getNil(); text = true; } } public void ELE(IRubyObject N) { if(te > ts || text) { int raw = -1; int rawlen = 0; ele_open = false; text = false; if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) { raw = ts; rawlen = te - ts; } if(block.isGiven()) { IRubyObject raw_string = runtime.getNil(); if(raw != -1) { raw_string = RubyString.newString(runtime, data, raw, rawlen); } yieldTokens(N, tag, attr, runtime.getNil(), taint); } else { hpricotToken(S, N, tag, attr, raw, rawlen, taint); } } } public void EBLK(IRubyObject N, int T) { tag = CAT(tag, mark_tag, p - T + 1); ELE(N); } public void hpricotAdd(IRubyObject focus, IRubyObject ele) { IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN); if(children.isNil()) { H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1)); } ((RubyArray)children).append(ele); H_ELE_SET(ele, H_ELE_PARENT, focus); } private static class TokenInfo { public IRubyObject sym; public IRubyObject tag; public IRubyObject attr; public int raw; public int rawlen; public IRubyObject ec; public IRubyObject ele; public Extra x; public Ruby runtime; public Scanner scanner; public State S; public void H_ELE(RubyClass klass) { ele = klass.allocate(); if(klass == x.cElem) { H_ELE_SET(ele, H_ELE_TAG, tag); H_ELE_SET(ele, H_ELE_ATTR, attr); H_ELE_SET(ele, H_ELE_EC, ec); if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) { H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen)); } } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) { if(klass == x.cBogusETag) { H_ELE_SET(ele, H_ELE_TAG, tag); if(raw != -1) { H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen)); } } else { if(klass == x.cDocType) { scanner.ATTR(runtime.newSymbol("target"), tag); } H_ELE_SET(ele, H_ELE_ATTR, attr); if(klass != x.cProcIns) { tag = runtime.getNil(); if(raw != -1) { tag = RubyString.newString(runtime, scanner.data, raw, rawlen); } } H_ELE_SET(ele, H_ELE_TAG, tag); } } else { H_ELE_SET(ele, H_ELE_TAG, tag); } S.last = ele; } public void hpricotToken(boolean taint) { // // in html mode, fix up start tags incorrectly formed as empty tags // if(!S.xml) { if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) { ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag); if(ec.isNil()) { tag = tag.callMethod(scanner.ctx, "downcase"); ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag); } } if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA && (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) && !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) { sym = x.sym_text; tag = RubyString.newString(runtime, scanner.data, raw, rawlen); } if(!ec.isNil()) { if(sym == x.sym_emptytag) { if(ec != x.sym_EMPTY) { sym = x.sym_stag; } } else if(sym == x.sym_stag) { if(ec == x.sym_EMPTY) { sym = x.sym_emptytag; } } } } if(sym == x.sym_emptytag || sym == x.sym_stag) { IRubyObject name = runtime.newFixnum(tag.hashCode()); H_ELE(x.cElem); H_ELE_SET(ele, H_ELE_HASH, name); if(!S.xml) { IRubyObject match = runtime.getNil(), e = S.focus; while(e != S.doc) { if (ec.isNil()) { // Anything can contain an unknown element if(match.isNil()) { match = e; } } else { IRubyObject hEC = H_ELE_GET(e, H_ELE_EC); if(hEC instanceof RubyHash) { IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name); if(!has.isNil()) { if(has == runtime.getTrue()) { if(match.isNil()) { match = e; } } else if(has == x.symAllow) { match = S.focus; } else if(has == x.symDeny) { match = runtime.getNil(); } } } else { // Unknown elements can contain anything if(match.isNil()) { match = e; } } } e = H_ELE_GET(e, H_ELE_PARENT); } if(match.isNil()) { match = S.focus; } S.focus = match; } scanner.hpricotAdd(S.focus, ele); // // in the case of a start tag that should be empty, just // skip the step that focuses the element. focusing moves // us deeper into the document. // if(sym == x.sym_stag) { if(S.xml || ec != x.sym_EMPTY) { S.focus = ele; S.last = runtime.getNil(); } } } else if(sym == x.sym_etag) { IRubyObject name, match = runtime.getNil(), e = S.focus; if(S.strict) { if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) { tag = runtime.newString("div"); } } name = runtime.newFixnum(tag.hashCode()); while(e != S.doc) { if(H_ELE_GET(e, H_ELE_HASH).equals(name)) { match = e; break; } e = H_ELE_GET(e, H_ELE_PARENT); } if(match.isNil()) { H_ELE(x.cBogusETag); scanner.hpricotAdd(S.focus, ele); } else { ele = runtime.getNil(); if(raw != -1) { ele = RubyString.newString(runtime, scanner.data, raw, rawlen); } H_ELE_SET(match, H_ELE_ETAG, ele); S.focus = H_ELE_GET(match, H_ELE_PARENT); S.last = runtime.getNil(); } } else if(sym == x.sym_cdata) { H_ELE(x.cCData); scanner.hpricotAdd(S.focus, ele); } else if(sym == x.sym_comment) { H_ELE(x.cComment); scanner.hpricotAdd(S.focus, ele); } else if(sym == x.sym_doctype) { H_ELE(x.cDocType); if(S.strict) { RubyHash h = (RubyHash)attr; h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")); h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN")); } scanner.hpricotAdd(S.focus, ele); } else if(sym == x.sym_procins) { IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse); tag = RubyRegexp.nth_match(1, match); attr = RubyRegexp.nth_match(2, match); H_ELE(x.cProcIns); scanner.hpricotAdd(S.focus, ele); } else if(sym == x.sym_text) { if(!S.last.isNil() && S.last.getType() == x.cText) { ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag); } else { H_ELE(x.cText); scanner.hpricotAdd(S.focus, ele); } } else if(sym == x.sym_xmldecl) { H_ELE(x.cXMLDecl); scanner.hpricotAdd(S.focus, ele); } } } public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) { TokenInfo t = new TokenInfo(); t.sym = _sym; t.tag = _tag; t.attr = _attr; t.raw = _raw; t.rawlen = _rawlen; t.ec = runtime.getNil(); t.ele = runtime.getNil(); t.x = x; t.runtime = runtime; t.scanner = this; t.S = S; t.hpricotToken(taint); } public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) { if(sym == x.sym_text) { raw = tag; } IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw}); if(taint) { ary.setTaint(true); tag.setTaint(true); attr.setTaint(true); raw.setTaint(true); } block.yield(ctx, ary); } %%{ machine hpricot_scan; action newEle { if(text) { tag = CAT(tag, mark_tag, p); ELE(x.sym_text); text = false; } attr = runtime.getNil(); tag = runtime.getNil(); mark_tag = -1; ele_open = true; } action _tag { mark_tag = p; } action _aval { mark_aval = p; } action _akey { mark_akey = p; } action tag { tag = SET(mark_tag, p, tag); } action tagc { tag = SET(mark_tag, p-1, tag); } action aval { aval = SET(mark_aval, p, aval); } action aunq { if(data[p-1] == '"' || data[p-1] == '\'') { aval = SET(mark_aval, p-1, aval); } else { aval = SET(mark_aval, p, aval); } } action akey { akey = SET(mark_akey, p, akey); } action xmlver { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); } action xmlenc { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); } action xmlsd { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); } action pubid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); } action sysid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); } action new_attr { akey = runtime.getNil(); aval = runtime.getNil(); mark_akey = -1; mark_aval = -1; } action save_attr { if(!S.xml && !akey.isNil()) { akey = akey.callMethod(runtime.getCurrentContext(), "downcase"); } ATTR(akey, aval); } include hpricot_common "hpricot_common.rl"; }%% %% write data nofinal; public final static int BUFSIZE = 16384; private int cs, act, have = 0, nread = 0, curline = 1; private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0; private byte[] data; private State S = null; private IRubyObject port, opts, attr, tag, akey, aval, bufsize; private int mark_tag = -1, mark_akey = -1, mark_aval = -1; private boolean done = false, ele_open = false, taint = false, io = false, text = false; private int buffer_size = 0; private Extra x; private IRubyObject self; private Ruby runtime; private ThreadContext ctx; private Block block; private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins; private RaiseException newRaiseException(RubyClass exceptionClass, String message) { return new RaiseException(runtime, exceptionClass, message, true); } public Scanner(IRubyObject self, IRubyObject[] args, Block block) { this.self = self; this.runtime = self.getRuntime(); this.ctx = runtime.getCurrentContext(); this.block = block; attr = runtime.getNil(); tag = runtime.getNil(); akey = runtime.getNil(); aval = runtime.getNil(); bufsize = runtime.getNil(); this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct(); this.xmldecl = x.sym_xmldecl; this.doctype = x.sym_doctype; this.stag = x.sym_stag; this.etag = x.sym_etag; this.emptytag = x.sym_emptytag; this.comment = x.sym_comment; this.cdata = x.sym_cdata; this.procins = x.sym_procins; port = args[0]; if(args.length == 2) { opts = args[1]; } else { opts = runtime.getNil(); } taint = port.isTaint(); io = port.respondsTo("read"); if(!io) { if(port.respondsTo("to_str")) { port = port.callMethod(ctx, "to_str"); port = port.convertToString(); } else { throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)"); } } if(!(opts instanceof RubyHash)) { opts = runtime.getNil(); } if(!block.isGiven()) { S = new State(); S.doc = x.cDoc.allocate(); S.focus = S.doc; S.last = runtime.getNil(); S.xml = OPT(opts, "xml"); S.strict = OPT(opts, "xhtml_strict"); S.fixup = OPT(opts, "fixup_tags"); if(S.strict) { S.fixup = true; } S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts); S.EC = x.mHpricot.getConstant("ElementContent"); } buffer_size = BUFSIZE; if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) { bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size"); if(!bufsize.isNil()) { buffer_size = RubyNumeric.fix2int(bufsize); } } if(io) { buf = 0; data = new byte[buffer_size]; } } private int len, space; // hpricot_scan public IRubyObject scan() { %% write init; while(!done) { p = pe = len = buf; space = buffer_size - have; if(io) { if(space == 0) { /* We've used up the entire buffer storing an already-parsed token * prefix that must be preserved. Likely caused by super-long attributes. * Increase buffer size and continue */ buffer_size += BUFSIZE; data = realloc(data, buffer_size); space = buffer_size - have; } p = have; IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space)); ByteList bl = str.convertToString().getByteList(); len = bl.realSize; System.arraycopy(bl.bytes, bl.begin, data, p, len); } else { ByteList bl = port.convertToString().getByteList(); data = bl.bytes; buf = bl.begin; p = bl.begin; len = bl.realSize + 1; if(p + len >= data.length) { data = new byte[len]; System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize); p = 0; buf = 0; } done = true; eof = p + len; } nread += len; /* If this is the last buffer, tack on an EOF. */ if(io && len < space) { data[p + len++] = 0; eof = p + len; done = true; } pe = p + len; %% write exec; if(cs == hpricot_scan_error) { if(!tag.isNil()) { throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY); } else { throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY); } } if(done && ele_open) { ele_open = false; if(ts > 0) { mark_tag = ts; ts = 0; text = true; } } if(ts == -1) { have = 0; if(mark_tag != -1 && text) { if(done) { if(mark_tag < p - 1) { tag = CAT(tag, mark_tag, p-1); ELE(x.sym_text); } } else { tag = CAT(tag, mark_tag, p); } } if(io) { mark_tag = 0; } else { mark_tag = ((RubyString)port).getByteList().begin; } } else if(io) { have = pe - ts; System.arraycopy(data, ts, data, buf, have); mark_tag = SLIDE(mark_tag); mark_akey = SLIDE(mark_akey); mark_aval = SLIDE(mark_aval); te -= ts; ts = 0; } } if(S != null) { return S.doc; } return runtime.getNil(); } } public static class HpricotModule { // hpricot_scan @JRubyMethod(module = true, optional = 1, required = 1, frame = true) public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) { return new Scanner(self, args, block).scan(); } // hpricot_css @JRubyMethod(module = true) public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) { return new HpricotCss(self, mod, str, node).scan(); } } public static class CData { @JRubyMethod public static IRubyObject content(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod(name = "content=") public static IRubyObject content_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_name(self, value); } } public static class Comment { @JRubyMethod public static IRubyObject content(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod(name = "content=") public static IRubyObject content_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_name(self, value); } } public static class DocType { @JRubyMethod public static IRubyObject raw_string(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_name(self); } @JRubyMethod public static IRubyObject target(IRubyObject self) { return hpricot_ele_get_target(self); } @JRubyMethod(name = "target=") public static IRubyObject target_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_target(self, value); } @JRubyMethod public static IRubyObject public_id(IRubyObject self) { return hpricot_ele_get_public_id(self); } @JRubyMethod(name = "public_id=") public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_public_id(self, value); } @JRubyMethod public static IRubyObject system_id(IRubyObject self) { return hpricot_ele_get_system_id(self); } @JRubyMethod(name = "system_id=") public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_system_id(self, value); } } public static class Elem { @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_raw(self); } } public static class BogusETag { @JRubyMethod public static IRubyObject raw_string(IRubyObject self) { return hpricot_ele_get_attr(self); } @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_attr(self); } } public static class Text { @JRubyMethod public static IRubyObject raw_string(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_name(self); } @JRubyMethod public static IRubyObject content(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod(name = "content=") public static IRubyObject content_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_name(self, value); } } public static class XMLDecl { @JRubyMethod public static IRubyObject raw_string(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_name(self); } @JRubyMethod public static IRubyObject encoding(IRubyObject self) { return hpricot_ele_get_encoding(self); } @JRubyMethod(name = "encoding=") public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_encoding(self, value); } @JRubyMethod public static IRubyObject standalone(IRubyObject self) { return hpricot_ele_get_standalone(self); } @JRubyMethod(name = "standalone=") public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_standalone(self, value); } @JRubyMethod public static IRubyObject version(IRubyObject self) { return hpricot_ele_get_version(self); } @JRubyMethod(name = "version=") public static IRubyObject version_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_version(self, value); } } public static class ProcIns { @JRubyMethod public static IRubyObject target(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod(name = "target=") public static IRubyObject target_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_name(self, value); } @JRubyMethod public static IRubyObject content(IRubyObject self) { return hpricot_ele_get_attr(self); } @JRubyMethod(name = "content=") public static IRubyObject content_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_attr(self, value); } } public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!"; public final static int H_ELE_TAG = 0; public final static int H_ELE_PARENT = 1; public final static int H_ELE_ATTR = 2; public final static int H_ELE_ETAG = 3; public final static int H_ELE_RAW = 4; public final static int H_ELE_EC = 5; public final static int H_ELE_HASH = 6; public final static int H_ELE_CHILDREN = 7; public static IRubyObject H_ELE_GET(IRubyObject recv, int n) { return ((IRubyObject[])recv.dataGetStruct())[n]; } public static RubyHash H_ELE_GET_asHash(IRubyObject recv, int n) { IRubyObject obj = ((IRubyObject[])recv.dataGetStruct())[n]; if(obj.isNil()) { obj = RubyHash.newHash(recv.getRuntime()); ((IRubyObject[])recv.dataGetStruct())[n] = obj; } return (RubyHash)obj; } public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) { ((IRubyObject[])recv.dataGetStruct())[n] = value; return value; } private static class RefCallback implements Callback { private final int n; public RefCallback(int n) { this.n = n; } public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) { return H_ELE_GET(recv, n); } public Arity getArity() { return Arity.NO_ARGUMENTS; } } private static class SetCallback implements Callback { private final int n; public SetCallback(int n) { this.n = n; } public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) { return H_ELE_SET(recv, n, args[0]); } public Arity getArity() { return Arity.ONE_ARGUMENT; } } private final static Callback[] ref_func = new Callback[]{ new RefCallback(0), new RefCallback(1), new RefCallback(2), new RefCallback(3), new RefCallback(4), new RefCallback(5), new RefCallback(6), new RefCallback(7), new RefCallback(8), new RefCallback(9)}; private final static Callback[] set_func = new Callback[]{ new SetCallback(0), new SetCallback(1), new SetCallback(2), new SetCallback(3), new SetCallback(4), new SetCallback(5), new SetCallback(6), new SetCallback(7), new SetCallback(8), new SetCallback(9)}; public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() { // alloc_hpricot_struct public IRubyObject allocate(Ruby runtime, RubyClass klass) { RubyClass kurrent = klass; Object sz = kurrent.fastGetInternalVariable("__size__"); while(sz == null && kurrent != null) { kurrent = kurrent.getSuperClass(); sz = kurrent.fastGetInternalVariable("__size__"); } int size = RubyNumeric.fix2int((RubyObject)sz); RubyObject obj = new RubyObject(runtime, klass); IRubyObject[] all = new IRubyObject[size]; java.util.Arrays.fill(all, runtime.getNil()); obj.dataWrapStruct(all); return obj; } }; public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) { RubyClass klass = RubyClass.newClass(runtime, runtime.getObject()); klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length)); klass.setAllocator(alloc_hpricot_struct); for(int i = 0; i < members.length; i++) { String id = members[i].toString(); klass.defineMethod(id, ref_func[i]); klass.defineMethod(id + "=", set_func[i]); } return klass; } public boolean basicLoad(final Ruby runtime) throws IOException { Init_hpricot_scan(runtime); return true; } public static class Extra { IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_allowed, sym_children, sym_comment, sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno, sym_text, sym_EMPTY, sym_CDATA; public RubyModule mHpricot; public RubyClass structElem; public RubyClass structAttr; public RubyClass structBasic; public RubyClass cDoc; public RubyClass cCData; public RubyClass cComment; public RubyClass cDocType; public RubyClass cElem; public RubyClass cBogusETag; public RubyClass cText; public RubyClass cXMLDecl; public RubyClass cProcIns; public RubyClass rb_eHpricotParseError; public IRubyObject reProcInsParse; public Extra(Ruby runtime) { symAllow = runtime.newSymbol("allow"); symDeny = runtime.newSymbol("deny"); sym_xmldecl = runtime.newSymbol("xmldecl"); sym_doctype = runtime.newSymbol("doctype"); sym_procins = runtime.newSymbol("procins"); sym_stag = runtime.newSymbol("stag"); sym_etag = runtime.newSymbol("etag"); sym_emptytag = runtime.newSymbol("emptytag"); sym_allowed = runtime.newSymbol("allowed"); sym_children = runtime.newSymbol("children"); sym_comment = runtime.newSymbol("comment"); sym_cdata = runtime.newSymbol("cdata"); sym_name = runtime.newSymbol("name"); sym_parent = runtime.newSymbol("parent"); sym_raw_attributes = runtime.newSymbol("raw_attributes"); sym_raw_string = runtime.newSymbol("raw_string"); sym_tagno = runtime.newSymbol("tagno"); sym_text = runtime.newSymbol("text"); sym_EMPTY = runtime.newSymbol("EMPTY"); sym_CDATA = runtime.newSymbol("CDATA"); } } public static void Init_hpricot_scan(Ruby runtime) { Extra x = new Extra(runtime); x.mHpricot = runtime.defineModule("Hpricot"); x.mHpricot.dataWrapStruct(x); x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")}); x.mHpricot.defineAnnotatedMethods(HpricotModule.class); x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator()); x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children}); x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes}); x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent}); x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator()); x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator()); x.cCData.defineAnnotatedMethods(CData.class); x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator()); x.cComment.defineAnnotatedMethods(Comment.class); x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator()); x.cDocType.defineAnnotatedMethods(DocType.class); x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator()); x.cElem.defineAnnotatedMethods(Elem.class); x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator()); x.cBogusETag.defineAnnotatedMethods(BogusETag.class); x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator()); x.cText.defineAnnotatedMethods(Text.class); x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator()); x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class); x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator()); x.cProcIns.defineAnnotatedMethods(ProcIns.class); x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m"); x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse); } } hpricot-0.8.6/ext/hpricot_scan/hpricot_css.rl0000644000175000017500000000611011710073440020701 0ustar boutilboutil/* * hpricot_css.rl * ragel -C hpricot_css.rl -o hpricot_css.c * * Copyright (C) 2008 why the lucky stiff */ #include #define FILTER(id) \ rb_funcall2(mod, rb_intern("" # id), fargs, fvals); \ rb_ary_clear(tmpt); \ fargs = 1 #define FILTERAUTO() \ char filt[10]; \ sprintf(filt, "%.*s", te - ts, ts); \ rb_funcall2(mod, rb_intern(filt), fargs, fvals); \ rb_ary_clear(tmpt); \ fargs = 1 #ifdef HAVE_RUBY_ENCODING_H #define STRNEW(a, len) rb_external_str_new((a), (len)) #else #define STRNEW(a, len) rb_str_new((a), (len)) #endif #define PUSH(aps, ape) rb_ary_push(tmpt, fvals[fargs++] = STRNEW(aps, ape - aps)) #define P(id) printf(id ": %.*s\n", te - ts, ts); %%{ machine hpricot_css; action a { aps = p; } action b { ape = p; PUSH(aps, ape); } action c { ape = p; aps2 = p; } action d { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } commas = space* "," space*; traverse = [>+~]; sdot = "\\."; utfw = alnum | "_" | "-" | (0xc4 0xa8..0xbf) | (0xc5..0xdf 0x80..0xbf) | (0xe0..0xef 0x80..0xbf 0x80..0xbf) | (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf); utfword = utfw+; utfname = (utfw | sdot)+; quote1 = "'" [^']* "'"; quote2 = '"' [^"]* '"'; cssid = "#" %a utfname %b; cssclass = "." %a utfname %b; cssname = "[name=" %a utfname %b "]"; cssattr = "[" %a utfname %c space* [^ \n\t]? "=" %d space* (quote1 | quote2 | [^\]]+) "]"; csstag = utfname >a %b; cssmod = ("even" | "odd" | (digit | "n" | "+" | "-")* ); csschild = ":" %a ("only" | "nth" | "last" | "first") "-child" %b ("(" %a cssmod %b ")")?; csspos = ":" %a ("nth" | "eq" | "gt" | "lt" | "first" | "last" | "even" | "odd") %b ("(" %a digit+ %b ")")?; pseudop = "(" [^)]+ ")"; pseudoq = "'" (pseudop+ | [^'()]*) "'" | '"' (pseudop+ | [^"()]*) '"' | (pseudop+ | [^"()]*); pseudo = ":" %a utfname %b ("(" %a pseudoq %b ")")?; main := |* cssid => { FILTER(ID); }; cssclass => { FILTER(CLASS); }; cssname => { FILTER(NAME); }; cssattr => { FILTER(ATTR); }; csstag => { FILTER(TAG); }; cssmod => { FILTER(MOD); }; csschild => { FILTER(CHILD); }; csspos => { FILTER(POS); }; pseudo => { FILTER(PSUEDO); }; commas => { focus = rb_ary_new3(1, node); }; traverse => { FILTERAUTO(); }; space; *|; write data nofinal; }%% VALUE hpricot_css(VALUE self, VALUE mod, VALUE str, VALUE node) { int cs, act, eof; char *p, *pe, *ts, *te, *aps, *ape, *aps2, *ape2; int fargs = 1; VALUE fvals[6]; VALUE focus = rb_ary_new3(1, node); VALUE tmpt = rb_ary_new(); rb_gc_register_address(&focus); rb_gc_register_address(&tmpt); fvals[0] = focus; if (TYPE(str) != T_STRING) rb_raise(rb_eArgError, "bad CSS selector, String only please."); StringValue(str); p = RSTRING_PTR(str); pe = p + RSTRING_LEN(str); %% write init; %% write exec; rb_gc_unregister_address(&focus); rb_gc_unregister_address(&tmpt); return focus; } hpricot-0.8.6/ext/hpricot_scan/hpricot_css.java.rl0000644000175000017500000001046611710073440021632 0ustar boutilboutilimport java.io.IOException; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyHash; import org.jruby.RubyModule; import org.jruby.RubyNumeric; import org.jruby.RubyObject; import org.jruby.RubyObjectAdapter; import org.jruby.RubyRegexp; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; import org.jruby.javasupport.JavaEmbedUtils; import org.jruby.runtime.Arity; import org.jruby.runtime.Block; import org.jruby.runtime.ObjectAllocator; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.callback.Callback; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.load.BasicLibraryService; import org.jruby.util.ByteList; public class HpricotCss { public void FILTER(String id) { IRubyObject[] args = new IRubyObject[fargs]; System.arraycopy(fvals, 0, args, 0, fargs); mod.callMethod(ctx, id, args); tmpt.rb_clear(); fargs = 1; } public void FILTERAUTO() { try { FILTER(new String(data, ts, te - ts, "ISO-8859-1")); } catch(java.io.UnsupportedEncodingException e) {} } public void PUSH(int aps, int ape) { RubyString str = RubyString.newString(runtime, data, aps, ape-aps); fvals[fargs++] = str; tmpt.append(str); } private IRubyObject self, mod, str, node; private int cs, act, eof, p, pe, ts, te, aps, ape, aps2, ape2; private byte[] data; private int fargs = 1; private IRubyObject[] fvals = new IRubyObject[6]; private RubyArray focus; private RubyArray tmpt; private Ruby runtime; private ThreadContext ctx; public HpricotCss(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) { this.self = self; this.mod = mod; this.str = str; this.node = node; this.runtime = self.getRuntime(); this.ctx = runtime.getCurrentContext(); this.focus = RubyArray.newArray(runtime, node); this.tmpt = runtime.newArray(); fvals[0] = focus; if(!(str instanceof RubyString)) { throw runtime.newArgumentError("bad CSS selector, String only please."); } ByteList bl = ((RubyString)str).getByteList(); data = bl.bytes; p = bl.begin; pe = p + bl.realSize; eof = pe; } %%{ machine hpricot_css; action a { aps = p; } action b { ape = p; PUSH(aps, ape); } action c { ape = p; aps2 = p; } action d { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } commas = space* "," space*; traverse = [>+~]; sdot = "\\."; utfw = alnum | "_" | "-" | (0xc4 0xa8..0xbf) | (0xc5..0xdf 0x80..0xbf) | (0xe0..0xef 0x80..0xbf 0x80..0xbf) | (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf); utfword = utfw+; utfname = (utfw | sdot)+; quote1 = "'" [^']* "'"; quote2 = '"' [^"]* '"'; cssid = "#" %a utfname %b; cssclass = "." %a utfname %b; cssname = "[name=" %a utfname %b "]"; cssattr = "[" %a utfname %c space* [^ \n\t]? "=" %d space* (quote1 | quote2 | [^\]]+) "]"; csstag = utfname >a %b; cssmod = ("even" | "odd" | (digit | "n" | "+" | "-")* ); csschild = ":" %a ("only" | "nth" | "last" | "first") "-child" %b ("(" %a cssmod %b ")")?; csspos = ":" %a ("nth" | "eq" | "gt" | "lt" | "first" | "last" | "even" | "odd") %b ("(" %a digit+ %b ")")?; pseudop = "(" [^)]+ ")"; pseudoq = "'" (pseudop+ | [^'()]*) "'" | '"' (pseudop+ | [^"()]*) '"' | (pseudop+ | [^"()]*); pseudo = ":" %a utfname %b ("(" %a pseudoq %b ")")?; main := |* cssid => { FILTER("ID"); }; cssclass => { FILTER("CLASS"); }; cssname => { FILTER("NAME"); }; cssattr => { FILTER("ATTR"); }; csstag => { FILTER("TAG"); }; cssmod => { FILTER("MOD"); }; csschild => { FILTER("CHILD"); }; csspos => { FILTER("POS"); }; pseudo => { FILTER("PSUEDO"); }; commas => { focus = RubyArray.newArray(runtime, node); }; traverse => { FILTERAUTO(); }; space; *|; write data nofinal; }%% public IRubyObject scan() { %% write init; %% write exec; return focus; } } hpricot-0.8.6/ext/hpricot_scan/hpricot_common.rl0000644000175000017500000000543011710073440021405 0ustar boutilboutil%%{ machine hpricot_common; # # HTML tokens # (a blatant rip from HTree) # newline = '\n' @{curline += 1;} ; NameChar = [\-A-Za-z0-9._:?] ; Name = [A-Za-z_:] NameChar* ; StartComment = "" ; StartCdata = "" ; NameCap = Name >_tag %tag; NameAttr = NameChar+ >_akey %akey ; Q1Char = [^'] ; Q1Attr = Q1Char* >_aval %aval ; Q2Char = [^"] ; Q2Attr = Q2Char* >_aval %aval ; UnqAttr = ( space >_aval | [^ \t\r\n<>"'] >_aval [^ \t\r\n<>]* %aunq ) ; Nmtoken = NameChar+ >_akey %akey ; Attr = NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ; AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ; AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ; StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">"; EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ; EndTag = "" ; XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ; XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ; XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ; XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ; XmlYesNo = ("yes" | "no") >_aval %xmlsd ; XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ; XmlDecl = "" ; SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ; PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid '"' | "'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ; ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ; DocType = "" ; StartXmlProcIns = "{ TEXT_PASS(); } space+ ; EndXmlProcIns = "?"? ">" ; html_comment := |* EndComment @{ EBLK(comment, 3); fgoto main; }; any | newline { TEXT_PASS(); }; *|; html_cdata := |* EndCdata @{ EBLK(cdata, 3); fgoto main; }; any | newline { TEXT_PASS(); }; *|; html_procins := |* EndXmlProcIns @{ EBLK(procins, 2); fgoto main; }; any | newline { TEXT_PASS(); }; *|; main := |* XmlDecl >newEle { ELE(xmldecl); }; DocType >newEle { ELE(doctype); }; StartXmlProcIns >newEle { fgoto html_procins; }; StartTag >newEle { ELE(stag); }; EndTag >newEle { ELE(etag); }; EmptyTag >newEle { ELE(emptytag); }; StartComment >newEle { fgoto html_comment; }; StartCdata >newEle { fgoto html_cdata; }; any | newline { TEXT_PASS(); }; *|; }%%; hpricot-0.8.6/ext/hpricot_scan/extconf.rb0000644000175000017500000000026411710073440020021 0ustar boutilboutilrequire 'mkmf' $CFLAGS << " #{ENV["CFLAGS"]}" CONFIG['optflags'] = '' if $CFLAGS =~ /DEBUG/ dir_config("hpricot_scan") have_library("c", "main") create_makefile("hpricot_scan") hpricot-0.8.6/ext/hpricot_scan/hpricot_scan.c0000644000175000017500000043341111710073440020652 0ustar boutilboutil #line 1 "hpricot_scan.rl" /* * hpricot_scan.rl * * $Author: why $ * $Date: 2006-05-08 22:03:50 -0600 (Mon, 08 May 2006) $ * * Copyright (C) 2006, 2010 why the lucky stiff */ #include #include struct hpricot_struct { int len; VALUE* ptr; }; #ifndef RARRAY_LEN #define RARRAY_LEN(arr) RARRAY(arr)->len #define RSTRING_LEN(str) RSTRING(str)->len #define RSTRING_PTR(str) RSTRING(str)->ptr #endif VALUE hpricot_css(VALUE, VALUE, VALUE, VALUE, VALUE); #define NO_WAY_SERIOUSLY "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!" static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment, sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno, sym_allowed, sym_text, sym_children, sym_EMPTY, sym_CDATA; static VALUE mHpricot, rb_eHpricotParseError; static VALUE cBogusETag, cCData, cComment, cDoc, cDocType, cElem, cText, cXMLDecl, cProcIns, symAllow, symDeny; static ID s_ElementContent; static ID s_downcase, s_new, s_parent, s_read, s_to_str; static VALUE reProcInsParse; #define H_ELE_TAG 0 #define H_ELE_PARENT 1 #define H_ELE_ATTR 2 #define H_ELE_ETAG 3 #define H_ELE_RAW 4 #define H_ELE_EC 5 #define H_ELE_HASH 6 #define H_ELE_CHILDREN 7 #define HSTRUCT_PTR(ele) ((struct hpricot_struct*)DATA_PTR(ele))->ptr #define H_ELE_GET(ele, idx) HSTRUCT_PTR(ele)[idx] #define H_ELE_SET(ele, idx, val) HSTRUCT_PTR(ele)[idx] = val #define OPT(opts, key) (!NIL_P(opts) && RTEST(rb_hash_aref(opts, ID2SYM(rb_intern("" # key))))) #ifdef HAVE_RUBY_ENCODING_H #include # define ASSOCIATE_INDEX(s) rb_enc_associate_index((s), encoding_index) # define ENCODING_INDEX , encoding_index #else # define ASSOCIATE_INDEX(s) # define ENCODING_INDEX #endif #define ELE(N) \ if (te > ts || text == 1) { \ char *raw = NULL; \ int rawlen = 0; \ ele_open = 0; text = 0; \ if (ts != 0 && sym_##N != sym_cdata && sym_##N != sym_text && sym_##N != sym_procins && sym_##N != sym_comment) { \ raw = ts; rawlen = te - ts; \ } \ if (rb_block_given_p()) { \ VALUE raw_string = Qnil; \ if (raw != NULL) { \ raw_string = rb_str_new(raw, rawlen); \ ASSOCIATE_INDEX(raw_string); \ } \ rb_yield_tokens(sym_##N, tag, attr, Qnil, taint); \ } else \ rb_hpricot_token(S, sym_##N, tag, attr, raw, rawlen, taint ENCODING_INDEX); \ } #define SET(N, E) \ if (mark_##N == NULL || E == mark_##N) { \ N = rb_str_new2(""); \ ASSOCIATE_INDEX(N); \ } else if (E > mark_##N) { \ N = rb_str_new(mark_##N, E - mark_##N); \ ASSOCIATE_INDEX(N); \ } #define CAT(N, E) if (NIL_P(N)) { SET(N, E); } else { rb_str_cat(N, mark_##N, E - mark_##N); } #define SLIDE(N) if (mark_##N > ts) mark_##N = buf + (mark_##N - ts); #define ATTR(K, V) \ if (!NIL_P(K)) { \ if (NIL_P(attr)) attr = rb_hash_new(); \ rb_hash_aset(attr, K, V); \ } #define TEXT_PASS() \ if (text == 0) \ { \ if (ele_open == 1) { \ ele_open = 0; \ if (ts > 0) { \ mark_tag = ts; \ } \ } else { \ mark_tag = p; \ } \ attr = Qnil; \ tag = Qnil; \ text = 1; \ } #define EBLK(N, T) CAT(tag, p - T + 1); ELE(N); #line 176 "hpricot_scan.rl" #line 126 "hpricot_scan.c" static const int hpricot_scan_start = 198; static const int hpricot_scan_error = -1; static const int hpricot_scan_en_html_comment = 208; static const int hpricot_scan_en_html_cdata = 210; static const int hpricot_scan_en_html_procins = 212; static const int hpricot_scan_en_main = 198; #line 179 "hpricot_scan.rl" #define BUFSIZE 16384 void rb_yield_tokens(VALUE sym, VALUE tag, VALUE attr, VALUE raw, int taint) { VALUE ary; if (sym == sym_text) { raw = tag; } ary = rb_ary_new3(4, sym, tag, attr, raw); if (taint) { OBJ_TAINT(ary); OBJ_TAINT(tag); OBJ_TAINT(attr); OBJ_TAINT(raw); } rb_yield(ary); } #ifndef RHASH_TBL /* rb_hash_lookup() is only in Ruby 1.8.7 */ static VALUE our_rb_hash_lookup(VALUE hash, VALUE key) { VALUE val; if (!st_lookup(RHASH(hash)->tbl, key, &val)) { return Qnil; /* without Hash#default */ } return val; } #define rb_hash_lookup our_rb_hash_lookup #endif static void rb_hpricot_add(VALUE focus, VALUE ele) { VALUE children = H_ELE_GET(focus, H_ELE_CHILDREN); if (NIL_P(children)) H_ELE_SET(focus, H_ELE_CHILDREN, (children = rb_ary_new2(1))); rb_ary_push(children, ele); H_ELE_SET(ele, H_ELE_PARENT, focus); } typedef struct { VALUE doc; VALUE focus; VALUE last; VALUE EC; unsigned char xml, strict, fixup; } hpricot_state; #define H_PROP(prop, idx) \ static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \ H_ELE_SET(self, idx, x); \ return self; \ } \ static VALUE hpricot_ele_clear_##prop(VALUE self) { \ H_ELE_SET(self, idx, Qnil); \ return Qtrue; \ } \ static VALUE hpricot_ele_get_##prop(VALUE self) { \ return H_ELE_GET(self, idx); \ } #define H_ATTR(prop) \ static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \ rb_hash_aset(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop)), x); \ return self; \ } \ static VALUE hpricot_ele_get_##prop(VALUE self) { \ return rb_hash_aref(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop))); \ } H_PROP(name, H_ELE_TAG); H_PROP(raw, H_ELE_RAW); H_PROP(parent, H_ELE_PARENT); H_PROP(attr, H_ELE_ATTR); H_PROP(etag, H_ELE_ETAG); H_PROP(children, H_ELE_CHILDREN); H_ATTR(target); H_ATTR(encoding); H_ATTR(version); H_ATTR(standalone); H_ATTR(system_id); H_ATTR(public_id); #define H_ELE(klass) \ ele = rb_obj_alloc(klass); \ if (klass == cElem) { \ H_ELE_SET(ele, H_ELE_TAG, tag); \ H_ELE_SET(ele, H_ELE_ATTR, attr); \ H_ELE_SET(ele, H_ELE_EC, ec); \ if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_doctype)) { \ VALUE raw_str = rb_str_new(raw, rawlen); \ ASSOCIATE_INDEX(raw_str); \ H_ELE_SET(ele, H_ELE_RAW, raw_str); \ } \ } else if (klass == cDocType || klass == cProcIns || klass == cXMLDecl || klass == cBogusETag) { \ if (klass == cBogusETag) { \ H_ELE_SET(ele, H_ELE_TAG, tag); \ if (raw != NULL) { \ VALUE raw_str = rb_str_new(raw, rawlen); \ ASSOCIATE_INDEX(raw_str); \ H_ELE_SET(ele, H_ELE_ATTR, raw_str); \ } \ } else { \ if (klass == cDocType) \ ATTR(ID2SYM(rb_intern("target")), tag); \ H_ELE_SET(ele, H_ELE_ATTR, attr); \ if (klass != cProcIns) { \ tag = Qnil; \ if (raw != NULL) { \ tag = rb_str_new(raw, rawlen); \ ASSOCIATE_INDEX(tag); \ } \ } \ H_ELE_SET(ele, H_ELE_TAG, tag); \ } \ } else { \ H_ELE_SET(ele, H_ELE_TAG, tag); \ } \ S->last = ele // // the swift, compact parser logic. most of the complicated stuff is done // in the lexer. this step just pairs up the start and end tags. // void rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw, int rawlen, int taint #ifdef HAVE_RUBY_ENCODING_H , int encoding_index #endif ) { VALUE ele, ec = Qnil; // // in html mode, fix up start tags incorrectly formed as empty tags // if (!S->xml) { if (sym == sym_emptytag || sym == sym_stag || sym == sym_etag) { ec = rb_hash_aref(S->EC, tag); if (NIL_P(ec)) { tag = rb_funcall(tag, s_downcase, 0); ec = rb_hash_aref(S->EC, tag); } } if (H_ELE_GET(S->focus, H_ELE_EC) == sym_CDATA && (sym != sym_procins && sym != sym_comment && sym != sym_cdata && sym != sym_text) && !(sym == sym_etag && INT2FIX(rb_str_hash(tag)) == H_ELE_GET(S->focus, H_ELE_HASH))) { sym = sym_text; tag = rb_str_new(raw, rawlen); ASSOCIATE_INDEX(tag); } if (!NIL_P(ec)) { if (sym == sym_emptytag) { if (ec != sym_EMPTY) sym = sym_stag; } else if (sym == sym_stag) { if (ec == sym_EMPTY) sym = sym_emptytag; } } } if (sym == sym_emptytag || sym == sym_stag) { VALUE name = INT2FIX(rb_str_hash(tag)); H_ELE(cElem); H_ELE_SET(ele, H_ELE_HASH, name); if (!S->xml) { VALUE match = Qnil, e = S->focus; while (e != S->doc) { if (ec == Qnil) { // anything can contain unknown elements if (match == Qnil) match = e; } else { VALUE hEC = H_ELE_GET(e, H_ELE_EC); if (TYPE(hEC) == T_HASH) { VALUE has = rb_hash_lookup(hEC, name); if (has != Qnil) { if (has == Qtrue) { if (match == Qnil) match = e; } else if (has == symAllow) { match = S->focus; } else if (has == symDeny) { match = Qnil; } } } else { // Unknown elements can contain anything if (match == Qnil) match = e; } } e = H_ELE_GET(e, H_ELE_PARENT); } if (match == Qnil) match = S->focus; S->focus = match; } rb_hpricot_add(S->focus, ele); // // in the case of a start tag that should be empty, just // skip the step that focuses the element. focusing moves // us deeper into the document. // if (sym == sym_stag) { if (S->xml || ec != sym_EMPTY) { S->focus = ele; S->last = Qnil; } } } else if (sym == sym_etag) { VALUE name, match = Qnil, e = S->focus; if (S->strict) { if (NIL_P(rb_hash_aref(S->EC, tag))) { tag = rb_str_new2("div"); ASSOCIATE_INDEX(tag); } } // // another optimization will be to improve this very simple // O(n) tag search, where n is the depth of the focused tag. // // (see also: the search above for fixups) // name = INT2FIX(rb_str_hash(tag)); while (e != S->doc) { if (H_ELE_GET(e, H_ELE_HASH) == name) { match = e; break; } e = H_ELE_GET(e, H_ELE_PARENT); } if (NIL_P(match)) { H_ELE(cBogusETag); rb_hpricot_add(S->focus, ele); } else { VALUE ele = Qnil; if (raw != NULL) { ele = rb_str_new(raw, rawlen); ASSOCIATE_INDEX(ele); } H_ELE_SET(match, H_ELE_ETAG, ele); S->focus = H_ELE_GET(match, H_ELE_PARENT); S->last = Qnil; } } else if (sym == sym_cdata) { H_ELE(cCData); rb_hpricot_add(S->focus, ele); } else if (sym == sym_comment) { H_ELE(cComment); rb_hpricot_add(S->focus, ele); } else if (sym == sym_doctype) { H_ELE(cDocType); if (S->strict) { VALUE id; id = rb_str_new2("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"); ASSOCIATE_INDEX(id); rb_hash_aset(attr, ID2SYM(rb_intern("system_id")), id); id = rb_str_new2("-//W3C//DTD XHTML 1.0 Strict//EN"); ASSOCIATE_INDEX(id); rb_hash_aset(attr, ID2SYM(rb_intern("public_id")), id); } rb_hpricot_add(S->focus, ele); } else if (sym == sym_procins) { VALUE match = rb_funcall(tag, rb_intern("match"), 1, reProcInsParse); tag = rb_reg_nth_match(1, match); attr = rb_reg_nth_match(2, match); { H_ELE(cProcIns); rb_hpricot_add(S->focus, ele); } } else if (sym == sym_text) { // TODO: add raw_string as well? if (!NIL_P(S->last) && RTEST(rb_obj_is_instance_of(S->last, cText))) { rb_str_append(H_ELE_GET(S->last, H_ELE_TAG), tag); } else { H_ELE(cText); rb_hpricot_add(S->focus, ele); } } else if (sym == sym_xmldecl) { H_ELE(cXMLDecl); rb_hpricot_add(S->focus, ele); } } VALUE hpricot_scan(int argc, VALUE *argv, VALUE self) { int cs, act, have = 0, nread = 0, curline = 1, text = 0, io = 0; char *ts = 0, *te = 0, *buf = NULL, *eof = NULL; hpricot_state *S = NULL; VALUE port, opts; VALUE attr = Qnil, tag = Qnil, akey = Qnil, aval = Qnil, bufsize = Qnil; char *mark_tag = 0, *mark_akey = 0, *mark_aval = 0; int done = 0, ele_open = 0, buffer_size = 0, taint = 0; #ifdef HAVE_RUBY_ENCODING_H int encoding_index = rb_enc_to_index(rb_default_external_encoding()); #endif rb_scan_args(argc, argv, "11", &port, &opts); taint = OBJ_TAINTED(port); io = rb_respond_to(port, s_read); if (!io) { if (rb_respond_to(port, s_to_str)) { port = rb_funcall(port, s_to_str, 0); StringValue(port); } else { rb_raise(rb_eArgError, "an Hpricot document must be built from an input source (a String or IO object.)"); } } if (TYPE(opts) != T_HASH) opts = Qnil; if (!rb_block_given_p()) { S = ALLOC(hpricot_state); S->doc = rb_obj_alloc(cDoc); rb_gc_register_address(&S->doc); S->focus = S->doc; S->last = Qnil; S->xml = OPT(opts, xml); S->strict = OPT(opts, xhtml_strict); S->fixup = OPT(opts, fixup_tags); if (S->strict) S->fixup = 1; rb_ivar_set(S->doc, rb_intern("@options"), opts); S->EC = rb_const_get(mHpricot, s_ElementContent); } buffer_size = BUFSIZE; if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) { bufsize = rb_ivar_get(self, rb_intern("@buffer_size")); if (!NIL_P(bufsize)) { buffer_size = NUM2INT(bufsize); } } if (io) buf = ALLOC_N(char, buffer_size); #line 508 "hpricot_scan.c" { cs = hpricot_scan_start; ts = 0; te = 0; act = 0; } #line 550 "hpricot_scan.rl" while (!done) { VALUE str; char *p, *pe; int len, space = buffer_size - have, tokstart_diff, tokend_diff, mark_tag_diff, mark_akey_diff, mark_aval_diff; if (io) { if (space == 0) { /* We've used up the entire buffer storing an already-parsed token * prefix that must be preserved. Likely caused by super-long attributes. * Increase buffer size and continue */ tokstart_diff = ts - buf; tokend_diff = te - buf; mark_tag_diff = mark_tag - buf; mark_akey_diff = mark_akey - buf; mark_aval_diff = mark_aval - buf; buffer_size += BUFSIZE; REALLOC_N(buf, char, buffer_size); space = buffer_size - have; ts = buf + tokstart_diff; te = buf + tokend_diff; mark_tag = buf + mark_tag_diff; mark_akey = buf + mark_akey_diff; mark_aval = buf + mark_aval_diff; } p = buf + have; str = rb_funcall(port, s_read, 1, INT2FIX(space)); len = RSTRING_LEN(str); memcpy(p, StringValuePtr(str), len); } else { p = RSTRING_PTR(port); len = RSTRING_LEN(port) + 1; done = 1; } nread += len; /* If this is the last buffer, tack on an EOF. */ if (io && len < space) { p[len++] = 0; done = 1; } pe = p + len; #line 569 "hpricot_scan.c" { if ( p == pe ) goto _test_eof; switch ( cs ) { tr0: #line 73 "hpricot_common.rl" {{p = ((te))-1;}{ TEXT_PASS(); }} goto st198; tr4: #line 71 "hpricot_common.rl" {te = p+1;{ {goto st208;} }} goto st198; tr15: #line 136 "hpricot_scan.rl" { SET(tag, p); } #line 66 "hpricot_common.rl" {te = p+1;{ ELE(doctype); }} goto st198; tr18: #line 66 "hpricot_common.rl" {te = p+1;{ ELE(doctype); }} goto st198; tr39: #line 1 "NONE" { switch( act ) { case 8: {{p = ((te))-1;} ELE(doctype); } break; case 10: {{p = ((te))-1;} ELE(stag); } break; case 12: {{p = ((te))-1;} ELE(emptytag); } break; case 15: {{p = ((te))-1;} TEXT_PASS(); } break; } } goto st198; tr93: #line 72 "hpricot_common.rl" {te = p+1;{ {goto st210;} }} goto st198; tr97: #line 136 "hpricot_scan.rl" { SET(tag, p); } #line 69 "hpricot_common.rl" {te = p+1;{ ELE(etag); }} goto st198; tr99: #line 69 "hpricot_common.rl" {te = p+1;{ ELE(etag); }} goto st198; tr103: #line 136 "hpricot_scan.rl" { SET(tag, p); } #line 68 "hpricot_common.rl" {te = p+1;{ ELE(stag); }} goto st198; tr107: #line 68 "hpricot_common.rl" {te = p+1;{ ELE(stag); }} goto st198; tr112: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {te = p+1;{ ELE(stag); }} goto st198; tr117: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {te = p+1;{ ELE(stag); }} goto st198; tr118: #line 70 "hpricot_common.rl" {te = p+1;{ ELE(emptytag); }} goto st198; tr129: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {te = p+1;{ ELE(stag); }} goto st198; tr133: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 68 "hpricot_common.rl" {te = p+1;{ ELE(stag); }} goto st198; tr139: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {te = p+1;{ ELE(stag); }} goto st198; tr335: #line 67 "hpricot_common.rl" {{p = ((te))-1;}{ {goto st212;} }} goto st198; tr349: #line 65 "hpricot_common.rl" {te = p+1;{ ELE(xmldecl); }} goto st198; tr397: #line 73 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} goto st198; tr398: #line 9 "hpricot_common.rl" {curline += 1;} #line 73 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} goto st198; tr400: #line 73 "hpricot_common.rl" {te = p;p--;{ TEXT_PASS(); }} goto st198; tr405: #line 66 "hpricot_common.rl" {te = p;p--;{ ELE(doctype); }} goto st198; tr406: #line 67 "hpricot_common.rl" {te = p;p--;{ {goto st212;} }} goto st198; st198: #line 1 "NONE" {ts = 0;} if ( ++p == pe ) goto _test_eof198; case 198: #line 1 "NONE" {ts = p;} #line 746 "hpricot_scan.c" switch( (*p) ) { case 10: goto tr398; case 60: goto tr399; } goto tr397; tr399: #line 1 "NONE" {te = p+1;} #line 121 "hpricot_scan.rl" { if (text == 1) { CAT(tag, p); ELE(text); text = 0; } attr = Qnil; tag = Qnil; mark_tag = NULL; ele_open = 1; } #line 73 "hpricot_common.rl" {act = 15;} goto st199; st199: if ( ++p == pe ) goto _test_eof199; case 199: #line 774 "hpricot_scan.c" switch( (*p) ) { case 33: goto st0; case 47: goto st59; case 58: goto tr403; case 63: goto st139; case 95: goto tr403; } if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr403; } else if ( (*p) >= 65 ) goto tr403; goto tr400; st0: if ( ++p == pe ) goto _test_eof0; case 0: switch( (*p) ) { case 45: goto st1; case 68: goto st2; case 91: goto st53; } goto tr0; st1: if ( ++p == pe ) goto _test_eof1; case 1: if ( (*p) == 45 ) goto tr4; goto tr0; st2: if ( ++p == pe ) goto _test_eof2; case 2: if ( (*p) == 79 ) goto st3; goto tr0; st3: if ( ++p == pe ) goto _test_eof3; case 3: if ( (*p) == 67 ) goto st4; goto tr0; st4: if ( ++p == pe ) goto _test_eof4; case 4: if ( (*p) == 84 ) goto st5; goto tr0; st5: if ( ++p == pe ) goto _test_eof5; case 5: if ( (*p) == 89 ) goto st6; goto tr0; st6: if ( ++p == pe ) goto _test_eof6; case 6: if ( (*p) == 80 ) goto st7; goto tr0; st7: if ( ++p == pe ) goto _test_eof7; case 7: if ( (*p) == 69 ) goto st8; goto tr0; st8: if ( ++p == pe ) goto _test_eof8; case 8: if ( (*p) == 32 ) goto st9; if ( 9 <= (*p) && (*p) <= 13 ) goto st9; goto tr0; st9: if ( ++p == pe ) goto _test_eof9; case 9: switch( (*p) ) { case 32: goto st9; case 58: goto tr12; case 95: goto tr12; } if ( (*p) < 65 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st9; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr12; } else goto tr12; goto tr0; tr12: #line 133 "hpricot_scan.rl" { mark_tag = p; } goto st10; st10: if ( ++p == pe ) goto _test_eof10; case 10: #line 882 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr13; case 62: goto tr15; case 63: goto st10; case 91: goto tr16; case 95: goto st10; } if ( (*p) < 48 ) { if ( (*p) > 13 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st10; } else if ( (*p) >= 9 ) goto tr13; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st10; } else if ( (*p) >= 65 ) goto st10; } else goto st10; goto tr0; tr13: #line 136 "hpricot_scan.rl" { SET(tag, p); } goto st11; st11: if ( ++p == pe ) goto _test_eof11; case 11: #line 913 "hpricot_scan.c" switch( (*p) ) { case 32: goto st11; case 62: goto tr18; case 80: goto st12; case 83: goto st48; case 91: goto st26; } if ( 9 <= (*p) && (*p) <= 13 ) goto st11; goto tr0; st12: if ( ++p == pe ) goto _test_eof12; case 12: if ( (*p) == 85 ) goto st13; goto tr0; st13: if ( ++p == pe ) goto _test_eof13; case 13: if ( (*p) == 66 ) goto st14; goto tr0; st14: if ( ++p == pe ) goto _test_eof14; case 14: if ( (*p) == 76 ) goto st15; goto tr0; st15: if ( ++p == pe ) goto _test_eof15; case 15: if ( (*p) == 73 ) goto st16; goto tr0; st16: if ( ++p == pe ) goto _test_eof16; case 16: if ( (*p) == 67 ) goto st17; goto tr0; st17: if ( ++p == pe ) goto _test_eof17; case 17: if ( (*p) == 32 ) goto st18; if ( 9 <= (*p) && (*p) <= 13 ) goto st18; goto tr0; st18: if ( ++p == pe ) goto _test_eof18; case 18: switch( (*p) ) { case 32: goto st18; case 34: goto st19; case 39: goto st30; } if ( 9 <= (*p) && (*p) <= 13 ) goto st18; goto tr0; st19: if ( ++p == pe ) goto _test_eof19; case 19: switch( (*p) ) { case 9: goto tr30; case 34: goto tr31; case 61: goto tr30; case 95: goto tr30; } if ( (*p) < 39 ) { if ( 32 <= (*p) && (*p) <= 37 ) goto tr30; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr30; } else if ( (*p) >= 63 ) goto tr30; } else goto tr30; goto tr0; tr30: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st20; st20: if ( ++p == pe ) goto _test_eof20; case 20: #line 1010 "hpricot_scan.c" switch( (*p) ) { case 9: goto st20; case 34: goto tr33; case 61: goto st20; case 95: goto st20; } if ( (*p) < 39 ) { if ( 32 <= (*p) && (*p) <= 37 ) goto st20; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st20; } else if ( (*p) >= 63 ) goto st20; } else goto st20; goto tr0; tr31: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 158 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } goto st21; tr33: #line 158 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } goto st21; st21: if ( ++p == pe ) goto _test_eof21; case 21: #line 1043 "hpricot_scan.c" switch( (*p) ) { case 32: goto st22; case 62: goto tr18; case 91: goto st26; } if ( 9 <= (*p) && (*p) <= 13 ) goto st22; goto tr0; st22: if ( ++p == pe ) goto _test_eof22; case 22: switch( (*p) ) { case 32: goto st22; case 34: goto st23; case 39: goto st28; case 62: goto tr18; case 91: goto st26; } if ( 9 <= (*p) && (*p) <= 13 ) goto st22; goto tr0; st23: if ( ++p == pe ) goto _test_eof23; case 23: if ( (*p) == 34 ) goto tr38; goto tr37; tr37: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st24; st24: if ( ++p == pe ) goto _test_eof24; case 24: #line 1081 "hpricot_scan.c" if ( (*p) == 34 ) goto tr41; goto st24; tr38: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st25; tr41: #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st25; st25: if ( ++p == pe ) goto _test_eof25; case 25: #line 1099 "hpricot_scan.c" switch( (*p) ) { case 32: goto st25; case 62: goto tr18; case 91: goto st26; } if ( 9 <= (*p) && (*p) <= 13 ) goto st25; goto tr39; tr16: #line 136 "hpricot_scan.rl" { SET(tag, p); } goto st26; st26: if ( ++p == pe ) goto _test_eof26; case 26: #line 1116 "hpricot_scan.c" if ( (*p) == 93 ) goto st27; goto st26; st27: if ( ++p == pe ) goto _test_eof27; case 27: switch( (*p) ) { case 32: goto st27; case 62: goto tr18; } if ( 9 <= (*p) && (*p) <= 13 ) goto st27; goto tr39; st28: if ( ++p == pe ) goto _test_eof28; case 28: if ( (*p) == 39 ) goto tr38; goto tr44; tr44: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st29; st29: if ( ++p == pe ) goto _test_eof29; case 29: #line 1146 "hpricot_scan.c" if ( (*p) == 39 ) goto tr41; goto st29; st30: if ( ++p == pe ) goto _test_eof30; case 30: switch( (*p) ) { case 9: goto tr46; case 39: goto tr47; case 61: goto tr46; case 95: goto tr46; } if ( (*p) < 40 ) { if ( (*p) > 33 ) { if ( 35 <= (*p) && (*p) <= 37 ) goto tr46; } else if ( (*p) >= 32 ) goto tr46; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr46; } else if ( (*p) >= 63 ) goto tr46; } else goto tr46; goto tr0; tr46: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st31; st31: if ( ++p == pe ) goto _test_eof31; case 31: #line 1183 "hpricot_scan.c" switch( (*p) ) { case 9: goto st31; case 39: goto tr49; case 61: goto st31; case 95: goto st31; } if ( (*p) < 40 ) { if ( (*p) > 33 ) { if ( 35 <= (*p) && (*p) <= 37 ) goto st31; } else if ( (*p) >= 32 ) goto st31; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st31; } else if ( (*p) >= 63 ) goto st31; } else goto st31; goto tr0; tr47: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 158 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } goto st32; tr49: #line 158 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } goto st32; tr55: #line 158 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st32; tr82: #line 158 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st32; st32: if ( ++p == pe ) goto _test_eof32; case 32: #line 1233 "hpricot_scan.c" switch( (*p) ) { case 9: goto st33; case 32: goto st33; case 33: goto st31; case 39: goto tr49; case 62: goto tr18; case 91: goto st26; case 95: goto st31; } if ( (*p) < 40 ) { if ( (*p) > 13 ) { if ( 35 <= (*p) && (*p) <= 37 ) goto st31; } else if ( (*p) >= 10 ) goto st22; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st31; } else if ( (*p) >= 61 ) goto st31; } else goto st31; goto tr0; st33: if ( ++p == pe ) goto _test_eof33; case 33: switch( (*p) ) { case 9: goto st33; case 32: goto st33; case 34: goto st23; case 39: goto tr51; case 62: goto tr18; case 91: goto st26; case 95: goto st31; } if ( (*p) < 40 ) { if ( (*p) > 13 ) { if ( 33 <= (*p) && (*p) <= 37 ) goto st31; } else if ( (*p) >= 10 ) goto st22; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st31; } else if ( (*p) >= 61 ) goto st31; } else goto st31; goto tr0; tr51: #line 158 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } goto st34; tr62: #line 158 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); } #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st34; st34: if ( ++p == pe ) goto _test_eof34; case 34: #line 1300 "hpricot_scan.c" switch( (*p) ) { case 9: goto tr52; case 32: goto tr52; case 33: goto tr54; case 39: goto tr55; case 62: goto tr56; case 91: goto tr57; case 95: goto tr54; } if ( (*p) < 40 ) { if ( (*p) > 13 ) { if ( 35 <= (*p) && (*p) <= 37 ) goto tr54; } else if ( (*p) >= 10 ) goto tr53; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr54; } else if ( (*p) >= 61 ) goto tr54; } else goto tr54; goto tr44; tr52: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st35; st35: if ( ++p == pe ) goto _test_eof35; case 35: #line 1333 "hpricot_scan.c" switch( (*p) ) { case 9: goto st35; case 32: goto st35; case 34: goto st37; case 39: goto tr62; case 62: goto tr63; case 91: goto st40; case 95: goto st47; } if ( (*p) < 40 ) { if ( (*p) > 13 ) { if ( 33 <= (*p) && (*p) <= 37 ) goto st47; } else if ( (*p) >= 10 ) goto st36; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st47; } else if ( (*p) >= 61 ) goto st47; } else goto st47; goto st29; tr53: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st36; st36: if ( ++p == pe ) goto _test_eof36; case 36: #line 1366 "hpricot_scan.c" switch( (*p) ) { case 32: goto st36; case 34: goto st37; case 39: goto tr65; case 62: goto tr63; case 91: goto st40; } if ( 9 <= (*p) && (*p) <= 13 ) goto st36; goto st29; st37: if ( ++p == pe ) goto _test_eof37; case 37: switch( (*p) ) { case 34: goto tr67; case 39: goto tr68; } goto tr66; tr66: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st38; st38: if ( ++p == pe ) goto _test_eof38; case 38: #line 1394 "hpricot_scan.c" switch( (*p) ) { case 34: goto tr70; case 39: goto tr71; } goto st38; tr81: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st39; tr67: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st39; tr70: #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st39; st39: if ( ++p == pe ) goto _test_eof39; case 39: #line 1418 "hpricot_scan.c" switch( (*p) ) { case 32: goto st39; case 39: goto tr41; case 62: goto tr63; case 91: goto st40; } if ( 9 <= (*p) && (*p) <= 13 ) goto st39; goto st29; tr56: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 66 "hpricot_common.rl" {act = 8;} goto st200; tr63: #line 1 "NONE" {te = p+1;} #line 66 "hpricot_common.rl" {act = 8;} goto st200; st200: if ( ++p == pe ) goto _test_eof200; case 200: #line 1446 "hpricot_scan.c" if ( (*p) == 39 ) goto tr41; goto st29; tr57: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st40; st40: if ( ++p == pe ) goto _test_eof40; case 40: #line 1458 "hpricot_scan.c" switch( (*p) ) { case 39: goto tr73; case 93: goto st42; } goto st40; tr73: #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st41; st41: if ( ++p == pe ) goto _test_eof41; case 41: #line 1472 "hpricot_scan.c" switch( (*p) ) { case 32: goto st41; case 62: goto tr76; case 93: goto st27; } if ( 9 <= (*p) && (*p) <= 13 ) goto st41; goto st26; tr76: #line 1 "NONE" {te = p+1;} #line 66 "hpricot_common.rl" {act = 8;} goto st201; st201: if ( ++p == pe ) goto _test_eof201; case 201: #line 1491 "hpricot_scan.c" if ( (*p) == 93 ) goto st27; goto st26; st42: if ( ++p == pe ) goto _test_eof42; case 42: switch( (*p) ) { case 32: goto st42; case 39: goto tr41; case 62: goto tr63; } if ( 9 <= (*p) && (*p) <= 13 ) goto st42; goto st29; tr68: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st43; tr71: #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st43; st43: if ( ++p == pe ) goto _test_eof43; case 43: #line 1521 "hpricot_scan.c" switch( (*p) ) { case 32: goto st43; case 34: goto tr41; case 62: goto tr78; case 91: goto st44; } if ( 9 <= (*p) && (*p) <= 13 ) goto st43; goto st24; tr78: #line 1 "NONE" {te = p+1;} #line 66 "hpricot_common.rl" {act = 8;} goto st202; st202: if ( ++p == pe ) goto _test_eof202; case 202: #line 1541 "hpricot_scan.c" if ( (*p) == 34 ) goto tr41; goto st24; st44: if ( ++p == pe ) goto _test_eof44; case 44: switch( (*p) ) { case 34: goto tr73; case 93: goto st45; } goto st44; st45: if ( ++p == pe ) goto _test_eof45; case 45: switch( (*p) ) { case 32: goto st45; case 34: goto tr41; case 62: goto tr78; } if ( 9 <= (*p) && (*p) <= 13 ) goto st45; goto st24; tr65: #line 159 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); } goto st46; st46: if ( ++p == pe ) goto _test_eof46; case 46: #line 1574 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr81; case 39: goto tr38; case 62: goto tr56; case 91: goto tr57; } if ( 9 <= (*p) && (*p) <= 13 ) goto tr81; goto tr44; tr54: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st47; st47: if ( ++p == pe ) goto _test_eof47; case 47: #line 1592 "hpricot_scan.c" switch( (*p) ) { case 9: goto st47; case 39: goto tr82; case 61: goto st47; case 95: goto st47; } if ( (*p) < 40 ) { if ( (*p) > 33 ) { if ( 35 <= (*p) && (*p) <= 37 ) goto st47; } else if ( (*p) >= 32 ) goto st47; } else if ( (*p) > 59 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st47; } else if ( (*p) >= 63 ) goto st47; } else goto st47; goto st29; st48: if ( ++p == pe ) goto _test_eof48; case 48: if ( (*p) == 89 ) goto st49; goto tr0; st49: if ( ++p == pe ) goto _test_eof49; case 49: if ( (*p) == 83 ) goto st50; goto tr0; st50: if ( ++p == pe ) goto _test_eof50; case 50: if ( (*p) == 84 ) goto st51; goto tr0; st51: if ( ++p == pe ) goto _test_eof51; case 51: if ( (*p) == 69 ) goto st52; goto tr0; st52: if ( ++p == pe ) goto _test_eof52; case 52: if ( (*p) == 77 ) goto st21; goto tr0; st53: if ( ++p == pe ) goto _test_eof53; case 53: if ( (*p) == 67 ) goto st54; goto tr0; st54: if ( ++p == pe ) goto _test_eof54; case 54: if ( (*p) == 68 ) goto st55; goto tr0; st55: if ( ++p == pe ) goto _test_eof55; case 55: if ( (*p) == 65 ) goto st56; goto tr0; st56: if ( ++p == pe ) goto _test_eof56; case 56: if ( (*p) == 84 ) goto st57; goto tr0; st57: if ( ++p == pe ) goto _test_eof57; case 57: if ( (*p) == 65 ) goto st58; goto tr0; st58: if ( ++p == pe ) goto _test_eof58; case 58: if ( (*p) == 91 ) goto tr93; goto tr0; st59: if ( ++p == pe ) goto _test_eof59; case 59: switch( (*p) ) { case 58: goto tr94; case 95: goto tr94; } if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr94; } else if ( (*p) >= 65 ) goto tr94; goto tr0; tr94: #line 133 "hpricot_scan.rl" { mark_tag = p; } goto st60; st60: if ( ++p == pe ) goto _test_eof60; case 60: #line 1713 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr95; case 62: goto tr97; case 63: goto st60; case 95: goto st60; } if ( (*p) < 48 ) { if ( (*p) > 13 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st60; } else if ( (*p) >= 9 ) goto tr95; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st60; } else if ( (*p) >= 65 ) goto st60; } else goto st60; goto tr0; tr95: #line 136 "hpricot_scan.rl" { SET(tag, p); } goto st61; st61: if ( ++p == pe ) goto _test_eof61; case 61: #line 1743 "hpricot_scan.c" switch( (*p) ) { case 32: goto st61; case 62: goto tr99; } if ( 9 <= (*p) && (*p) <= 13 ) goto st61; goto tr0; tr403: #line 133 "hpricot_scan.rl" { mark_tag = p; } goto st62; st62: if ( ++p == pe ) goto _test_eof62; case 62: #line 1759 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr100; case 47: goto tr102; case 62: goto tr103; case 63: goto st62; case 95: goto st62; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr100; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st62; } else if ( (*p) >= 65 ) goto st62; } else goto st62; goto tr0; tr100: #line 136 "hpricot_scan.rl" { SET(tag, p); } goto st63; st63: if ( ++p == pe ) goto _test_eof63; case 63: #line 1787 "hpricot_scan.c" switch( (*p) ) { case 32: goto st63; case 47: goto st66; case 62: goto tr107; case 63: goto tr105; case 95: goto tr105; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st63; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr105; } else if ( (*p) >= 65 ) goto tr105; } else goto tr105; goto tr0; tr105: #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st64; tr114: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st64; st64: if ( ++p == pe ) goto _test_eof64; case 64: #line 1839 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr108; case 47: goto tr110; case 61: goto tr111; case 62: goto tr112; case 63: goto st64; case 95: goto st64; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr108; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st64; } else if ( (*p) >= 65 ) goto st64; } else goto st64; goto tr39; tr108: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st65; tr140: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st65; tr134: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st65; st65: if ( ++p == pe ) goto _test_eof65; case 65: #line 1884 "hpricot_scan.c" switch( (*p) ) { case 32: goto st65; case 47: goto tr115; case 61: goto st67; case 62: goto tr117; case 63: goto tr114; case 95: goto tr114; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st65; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr114; } else if ( (*p) >= 65 ) goto tr114; } else goto tr114; goto tr39; tr102: #line 136 "hpricot_scan.rl" { SET(tag, p); } goto st66; tr110: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st66; tr115: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st66; st66: if ( ++p == pe ) goto _test_eof66; case 66: #line 1931 "hpricot_scan.c" if ( (*p) == 62 ) goto tr118; goto tr39; tr111: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st67; st67: if ( ++p == pe ) goto _test_eof67; case 67: #line 1943 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr120; case 32: goto tr120; case 34: goto st136; case 39: goto st137; case 47: goto tr124; case 60: goto tr39; case 62: goto tr117; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr121; } else if ( (*p) >= 9 ) goto tr120; goto tr119; tr119: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st68; st68: if ( ++p == pe ) goto _test_eof68; case 68: #line 1967 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr126; case 32: goto tr126; case 47: goto tr128; case 60: goto tr39; case 62: goto tr129; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr127; } else if ( (*p) >= 9 ) goto tr126; goto st68; tr126: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st69; tr319: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st69; tr167: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st69; st69: if ( ++p == pe ) goto _test_eof69; case 69: #line 2002 "hpricot_scan.c" switch( (*p) ) { case 32: goto st69; case 47: goto tr115; case 62: goto tr117; case 63: goto tr114; case 95: goto tr114; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st69; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr114; } else if ( (*p) >= 65 ) goto tr114; } else goto tr114; goto tr39; tr127: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st70; tr155: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st70; tr162: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st70; st70: if ( ++p == pe ) goto _test_eof70; case 70: #line 2043 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr126; case 32: goto tr126; case 47: goto tr132; case 60: goto tr39; case 62: goto tr133; case 63: goto tr131; case 95: goto tr131; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr127; } else if ( (*p) >= 9 ) goto tr126; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr131; } else if ( (*p) >= 65 ) goto tr131; } else goto tr131; goto st68; tr131: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st71; tr150: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st71; st71: if ( ++p == pe ) goto _test_eof71; case 71: #line 2108 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr134; case 32: goto tr134; case 47: goto tr137; case 60: goto tr39; case 61: goto tr138; case 62: goto tr139; case 63: goto st71; case 95: goto st71; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr135; } else if ( (*p) >= 9 ) goto tr134; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st71; } else if ( (*p) >= 65 ) goto st71; } else goto st71; goto st68; tr141: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st72; tr135: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st72; st72: if ( ++p == pe ) goto _test_eof72; case 72: #line 2154 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr140; case 32: goto tr140; case 47: goto tr132; case 60: goto tr39; case 61: goto st74; case 62: goto tr133; case 63: goto tr131; case 95: goto tr131; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr141; } else if ( (*p) >= 9 ) goto tr140; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr131; } else if ( (*p) >= 65 ) goto tr131; } else goto tr131; goto st68; tr124: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st73; tr128: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st73; tr132: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st73; tr137: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st73; tr147: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st73; tr151: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st73; st73: if ( ++p == pe ) goto _test_eof73; case 73: #line 2265 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr126; case 32: goto tr126; case 47: goto tr128; case 60: goto tr39; case 62: goto tr129; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr127; } else if ( (*p) >= 9 ) goto tr126; goto st68; tr121: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st74; tr138: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st74; st74: if ( ++p == pe ) goto _test_eof74; case 74: #line 2291 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr143; case 32: goto tr143; case 34: goto st77; case 39: goto st135; case 47: goto tr147; case 60: goto tr39; case 62: goto tr129; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr144; } else if ( (*p) >= 9 ) goto tr143; goto tr119; tr148: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st75; tr143: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st75; st75: if ( ++p == pe ) goto _test_eof75; case 75: #line 2324 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr148; case 32: goto tr148; case 34: goto st136; case 39: goto st137; case 47: goto tr124; case 60: goto tr39; case 62: goto tr117; case 63: goto tr150; case 95: goto tr150; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr149; } else if ( (*p) >= 9 ) goto tr148; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr150; } else if ( (*p) >= 65 ) goto tr150; } else goto tr150; goto tr119; tr149: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st76; tr144: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st76; st76: if ( ++p == pe ) goto _test_eof76; case 76: #line 2368 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr143; case 32: goto tr143; case 34: goto st77; case 39: goto st135; case 47: goto tr151; case 60: goto tr39; case 62: goto tr133; case 63: goto tr150; case 95: goto tr150; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr144; } else if ( (*p) >= 9 ) goto tr143; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr150; } else if ( (*p) >= 65 ) goto tr150; } else goto tr150; goto tr119; st77: if ( ++p == pe ) goto _test_eof77; case 77: switch( (*p) ) { case 13: goto tr153; case 32: goto tr153; case 34: goto tr155; case 47: goto tr156; case 60: goto tr157; case 62: goto tr158; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr154; } else if ( (*p) >= 9 ) goto tr153; goto tr152; tr152: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st78; st78: if ( ++p == pe ) goto _test_eof78; case 78: #line 2421 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr160; case 32: goto tr160; case 34: goto tr162; case 47: goto tr163; case 60: goto st80; case 62: goto tr165; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr161; } else if ( (*p) >= 9 ) goto tr160; goto st78; tr323: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st79; tr160: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st79; tr153: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st79; tr306: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st79; tr223: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st79; st79: if ( ++p == pe ) goto _test_eof79; case 79: #line 2470 "hpricot_scan.c" switch( (*p) ) { case 32: goto st79; case 34: goto tr167; case 47: goto tr169; case 62: goto tr170; case 63: goto tr168; case 95: goto tr168; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st79; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr168; } else if ( (*p) >= 65 ) goto tr168; } else goto tr168; goto st80; tr157: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st80; st80: if ( ++p == pe ) goto _test_eof80; case 80: #line 2499 "hpricot_scan.c" if ( (*p) == 34 ) goto tr167; goto st80; tr168: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st81; tr324: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st81; st81: if ( ++p == pe ) goto _test_eof81; case 81: #line 2543 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr171; case 34: goto tr167; case 47: goto tr173; case 61: goto tr174; case 62: goto tr175; case 63: goto st81; case 95: goto st81; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr171; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st81; } else if ( (*p) >= 65 ) goto st81; } else goto st81; goto st80; tr171: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st82; tr202: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st82; tr196: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st82; st82: if ( ++p == pe ) goto _test_eof82; case 82: #line 2589 "hpricot_scan.c" switch( (*p) ) { case 32: goto st82; case 34: goto tr167; case 47: goto tr169; case 61: goto st84; case 62: goto tr170; case 63: goto tr168; case 95: goto tr168; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st82; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr168; } else if ( (*p) >= 65 ) goto tr168; } else goto tr168; goto st80; tr173: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st83; tr169: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st83; tr325: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st83; st83: if ( ++p == pe ) goto _test_eof83; case 83: #line 2643 "hpricot_scan.c" switch( (*p) ) { case 34: goto tr167; case 62: goto tr178; } goto st80; tr158: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st203; tr165: #line 1 "NONE" {te = p+1;} #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st203; tr170: #line 1 "NONE" {te = p+1;} #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st203; tr175: #line 1 "NONE" {te = p+1;} #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st203; tr178: #line 1 "NONE" {te = p+1;} #line 70 "hpricot_common.rl" {act = 12;} goto st203; tr192: #line 1 "NONE" {te = p+1;} #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 68 "hpricot_common.rl" {act = 10;} goto st203; tr193: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 68 "hpricot_common.rl" {act = 10;} goto st203; tr201: #line 1 "NONE" {te = p+1;} #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st203; tr326: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st203; st203: if ( ++p == pe ) goto _test_eof203; case 203: #line 2790 "hpricot_scan.c" if ( (*p) == 34 ) goto tr167; goto st80; tr174: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st84; st84: if ( ++p == pe ) goto _test_eof84; case 84: #line 2802 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr179; case 32: goto tr179; case 34: goto tr181; case 39: goto st134; case 47: goto tr183; case 60: goto st80; case 62: goto tr170; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr180; } else if ( (*p) >= 9 ) goto tr179; goto tr152; tr179: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st85; st85: if ( ++p == pe ) goto _test_eof85; case 85: #line 2826 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr184; case 32: goto tr184; case 34: goto tr181; case 39: goto st134; case 47: goto tr183; case 60: goto st80; case 62: goto tr170; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr185; } else if ( (*p) >= 9 ) goto tr184; goto tr152; tr184: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st86; tr187: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st86; st86: if ( ++p == pe ) goto _test_eof86; case 86: #line 2859 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr184; case 32: goto tr184; case 34: goto tr181; case 39: goto st134; case 47: goto tr183; case 60: goto st80; case 62: goto tr170; case 63: goto tr186; case 95: goto tr186; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr185; } else if ( (*p) >= 9 ) goto tr184; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr186; } else if ( (*p) >= 65 ) goto tr186; } else goto tr186; goto tr152; tr185: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st87; tr188: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st87; st87: if ( ++p == pe ) goto _test_eof87; case 87: #line 2903 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr187; case 32: goto tr187; case 34: goto tr189; case 39: goto st94; case 47: goto tr191; case 60: goto st80; case 62: goto tr192; case 63: goto tr186; case 95: goto tr186; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr188; } else if ( (*p) >= 9 ) goto tr187; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr186; } else if ( (*p) >= 65 ) goto tr186; } else goto tr186; goto tr152; tr189: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st88; st88: if ( ++p == pe ) goto _test_eof88; case 88: #line 2938 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr153; case 32: goto tr153; case 34: goto tr155; case 47: goto tr191; case 60: goto tr157; case 62: goto tr193; case 63: goto tr186; case 95: goto tr186; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr154; } else if ( (*p) >= 9 ) goto tr153; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr186; } else if ( (*p) >= 65 ) goto tr186; } else goto tr186; goto tr152; tr161: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st89; tr154: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st89; tr209: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st89; tr217: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st89; st89: if ( ++p == pe ) goto _test_eof89; case 89: #line 2994 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr160; case 32: goto tr160; case 34: goto tr162; case 47: goto tr195; case 60: goto st80; case 62: goto tr192; case 63: goto tr194; case 95: goto tr194; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr161; } else if ( (*p) >= 9 ) goto tr160; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr194; } else if ( (*p) >= 65 ) goto tr194; } else goto tr194; goto st78; tr194: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st90; tr186: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st90; st90: if ( ++p == pe ) goto _test_eof90; case 90: #line 3060 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr196; case 32: goto tr196; case 34: goto tr162; case 47: goto tr199; case 60: goto st80; case 61: goto tr200; case 62: goto tr201; case 63: goto st90; case 95: goto st90; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr197; } else if ( (*p) >= 9 ) goto tr196; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st90; } else if ( (*p) >= 65 ) goto st90; } else goto st90; goto st78; tr203: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st91; tr197: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st91; st91: if ( ++p == pe ) goto _test_eof91; case 91: #line 3107 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr202; case 32: goto tr202; case 34: goto tr162; case 47: goto tr195; case 60: goto st80; case 61: goto st93; case 62: goto tr192; case 63: goto tr194; case 95: goto tr194; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr203; } else if ( (*p) >= 9 ) goto tr202; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr194; } else if ( (*p) >= 65 ) goto tr194; } else goto tr194; goto st78; tr183: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st92; tr163: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st92; tr195: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st92; tr199: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st92; tr156: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st92; tr191: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st92; st92: if ( ++p == pe ) goto _test_eof92; case 92: #line 3219 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr160; case 32: goto tr160; case 34: goto tr162; case 47: goto tr163; case 60: goto st80; case 62: goto tr165; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr161; } else if ( (*p) >= 9 ) goto tr160; goto st78; tr180: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st93; tr200: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st93; st93: if ( ++p == pe ) goto _test_eof93; case 93: #line 3246 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr187; case 32: goto tr187; case 34: goto tr189; case 39: goto st94; case 47: goto tr156; case 60: goto st80; case 62: goto tr165; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr188; } else if ( (*p) >= 9 ) goto tr187; goto tr152; st94: if ( ++p == pe ) goto _test_eof94; case 94: switch( (*p) ) { case 13: goto tr206; case 32: goto tr206; case 34: goto tr208; case 39: goto tr209; case 47: goto tr210; case 60: goto tr211; case 62: goto tr212; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr207; } else if ( (*p) >= 9 ) goto tr206; goto tr205; tr205: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st95; st95: if ( ++p == pe ) goto _test_eof95; case 95: #line 3289 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr214; case 32: goto tr214; case 34: goto tr216; case 39: goto tr217; case 47: goto tr218; case 60: goto st97; case 62: goto tr220; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr215; } else if ( (*p) >= 9 ) goto tr214; goto st95; tr304: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st96; tr214: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st96; tr206: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st96; st96: if ( ++p == pe ) goto _test_eof96; case 96: #line 3329 "hpricot_scan.c" switch( (*p) ) { case 32: goto st96; case 34: goto tr222; case 39: goto tr223; case 47: goto tr225; case 62: goto tr226; case 63: goto tr224; case 95: goto tr224; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st96; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr224; } else if ( (*p) >= 65 ) goto tr224; } else goto tr224; goto st97; tr211: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st97; st97: if ( ++p == pe ) goto _test_eof97; case 97: #line 3359 "hpricot_scan.c" switch( (*p) ) { case 34: goto tr222; case 39: goto tr223; } goto st97; tr318: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st98; tr247: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st98; tr314: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st98; tr305: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st98; tr222: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st98; tr310: #line 138 "hpricot_scan.rl" { SET(aval, p); } #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st98; st98: if ( ++p == pe ) goto _test_eof98; case 98: #line 3405 "hpricot_scan.c" switch( (*p) ) { case 32: goto st98; case 39: goto tr167; case 47: goto tr230; case 62: goto tr231; case 63: goto tr229; case 95: goto tr229; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st98; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr229; } else if ( (*p) >= 65 ) goto tr229; } else goto tr229; goto st99; tr316: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st99; st99: if ( ++p == pe ) goto _test_eof99; case 99: #line 3434 "hpricot_scan.c" if ( (*p) == 39 ) goto tr167; goto st99; tr229: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st100; tr320: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st100; st100: if ( ++p == pe ) goto _test_eof100; case 100: #line 3478 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr232; case 39: goto tr167; case 47: goto tr234; case 61: goto tr235; case 62: goto tr236; case 63: goto st100; case 95: goto st100; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr232; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st100; } else if ( (*p) >= 65 ) goto st100; } else goto st100; goto st99; tr232: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st101; tr260: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st101; tr254: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st101; st101: if ( ++p == pe ) goto _test_eof101; case 101: #line 3524 "hpricot_scan.c" switch( (*p) ) { case 32: goto st101; case 39: goto tr167; case 47: goto tr230; case 61: goto st103; case 62: goto tr231; case 63: goto tr229; case 95: goto tr229; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st101; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr229; } else if ( (*p) >= 65 ) goto tr229; } else goto tr229; goto st99; tr234: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st102; tr230: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st102; tr321: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st102; st102: if ( ++p == pe ) goto _test_eof102; case 102: #line 3578 "hpricot_scan.c" switch( (*p) ) { case 39: goto tr167; case 62: goto tr239; } goto st99; tr327: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st204; tr250: #line 1 "NONE" {te = p+1;} #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st204; tr231: #line 1 "NONE" {te = p+1;} #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st204; tr236: #line 1 "NONE" {te = p+1;} #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st204; tr239: #line 1 "NONE" {te = p+1;} #line 70 "hpricot_common.rl" {act = 12;} goto st204; tr253: #line 1 "NONE" {te = p+1;} #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 68 "hpricot_common.rl" {act = 10;} goto st204; tr317: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 68 "hpricot_common.rl" {act = 10;} goto st204; tr259: #line 1 "NONE" {te = p+1;} #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st204; tr322: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st204; st204: if ( ++p == pe ) goto _test_eof204; case 204: #line 3725 "hpricot_scan.c" if ( (*p) == 39 ) goto tr167; goto st99; tr235: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st103; st103: if ( ++p == pe ) goto _test_eof103; case 103: #line 3737 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr241; case 32: goto tr241; case 34: goto st130; case 39: goto tr244; case 47: goto tr245; case 60: goto st99; case 62: goto tr231; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr242; } else if ( (*p) >= 9 ) goto tr241; goto tr240; tr240: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st104; st104: if ( ++p == pe ) goto _test_eof104; case 104: #line 3761 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr247; case 32: goto tr247; case 39: goto tr162; case 47: goto tr249; case 60: goto st99; case 62: goto tr250; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr248; } else if ( (*p) >= 9 ) goto tr247; goto st104; tr248: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st105; tr315: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st105; tr272: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st105; tr216: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st105; tr208: #line 138 "hpricot_scan.rl" { SET(aval, p); } #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st105; st105: if ( ++p == pe ) goto _test_eof105; case 105: #line 3812 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr247; case 32: goto tr247; case 39: goto tr162; case 47: goto tr252; case 60: goto st99; case 62: goto tr253; case 63: goto tr251; case 95: goto tr251; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr248; } else if ( (*p) >= 9 ) goto tr247; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr251; } else if ( (*p) >= 65 ) goto tr251; } else goto tr251; goto st104; tr251: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st106; tr270: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st106; st106: if ( ++p == pe ) goto _test_eof106; case 106: #line 3878 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr254; case 32: goto tr254; case 39: goto tr162; case 47: goto tr257; case 60: goto st99; case 61: goto tr258; case 62: goto tr259; case 63: goto st106; case 95: goto st106; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr255; } else if ( (*p) >= 9 ) goto tr254; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st106; } else if ( (*p) >= 65 ) goto st106; } else goto st106; goto st104; tr261: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st107; tr255: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st107; st107: if ( ++p == pe ) goto _test_eof107; case 107: #line 3925 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr260; case 32: goto tr260; case 39: goto tr162; case 47: goto tr252; case 60: goto st99; case 61: goto st109; case 62: goto tr253; case 63: goto tr251; case 95: goto tr251; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr261; } else if ( (*p) >= 9 ) goto tr260; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr251; } else if ( (*p) >= 65 ) goto tr251; } else goto tr251; goto st104; tr245: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st108; tr249: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st108; tr252: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st108; tr257: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st108; tr267: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st108; tr271: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st108; st108: if ( ++p == pe ) goto _test_eof108; case 108: #line 4037 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr247; case 32: goto tr247; case 39: goto tr162; case 47: goto tr249; case 60: goto st99; case 62: goto tr250; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr248; } else if ( (*p) >= 9 ) goto tr247; goto st104; tr242: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st109; tr258: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st109; st109: if ( ++p == pe ) goto _test_eof109; case 109: #line 4064 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr263; case 32: goto tr263; case 34: goto st112; case 39: goto tr266; case 47: goto tr267; case 60: goto st99; case 62: goto tr250; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr264; } else if ( (*p) >= 9 ) goto tr263; goto tr240; tr268: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st110; tr263: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st110; st110: if ( ++p == pe ) goto _test_eof110; case 110: #line 4097 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr268; case 32: goto tr268; case 34: goto st130; case 39: goto tr244; case 47: goto tr245; case 60: goto st99; case 62: goto tr231; case 63: goto tr270; case 95: goto tr270; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr269; } else if ( (*p) >= 9 ) goto tr268; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr270; } else if ( (*p) >= 65 ) goto tr270; } else goto tr270; goto tr240; tr269: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st111; tr264: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st111; st111: if ( ++p == pe ) goto _test_eof111; case 111: #line 4141 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr263; case 32: goto tr263; case 34: goto st112; case 39: goto tr266; case 47: goto tr271; case 60: goto st99; case 62: goto tr253; case 63: goto tr270; case 95: goto tr270; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr264; } else if ( (*p) >= 9 ) goto tr263; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr270; } else if ( (*p) >= 65 ) goto tr270; } else goto tr270; goto tr240; st112: if ( ++p == pe ) goto _test_eof112; case 112: switch( (*p) ) { case 13: goto tr206; case 32: goto tr206; case 34: goto tr272; case 39: goto tr209; case 47: goto tr210; case 60: goto tr211; case 62: goto tr212; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr207; } else if ( (*p) >= 9 ) goto tr206; goto tr205; tr215: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st113; tr207: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st113; st113: if ( ++p == pe ) goto _test_eof113; case 113: #line 4207 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr214; case 32: goto tr214; case 34: goto tr216; case 39: goto tr217; case 47: goto tr274; case 60: goto st97; case 62: goto tr275; case 63: goto tr273; case 95: goto tr273; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr215; } else if ( (*p) >= 9 ) goto tr214; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr273; } else if ( (*p) >= 65 ) goto tr273; } else goto tr273; goto st95; tr273: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st114; tr297: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st114; st114: if ( ++p == pe ) goto _test_eof114; case 114: #line 4274 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr276; case 32: goto tr276; case 34: goto tr216; case 39: goto tr217; case 47: goto tr279; case 60: goto st97; case 61: goto tr280; case 62: goto tr281; case 63: goto st114; case 95: goto st114; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr277; } else if ( (*p) >= 9 ) goto tr276; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st114; } else if ( (*p) >= 65 ) goto st114; } else goto st114; goto st95; tr284: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st115; tr311: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st115; tr276: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st115; st115: if ( ++p == pe ) goto _test_eof115; case 115: #line 4326 "hpricot_scan.c" switch( (*p) ) { case 32: goto st115; case 34: goto tr222; case 39: goto tr223; case 47: goto tr225; case 61: goto st118; case 62: goto tr226; case 63: goto tr224; case 95: goto tr224; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st115; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr224; } else if ( (*p) >= 65 ) goto tr224; } else goto tr224; goto st97; tr224: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st116; tr307: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 161 "hpricot_scan.rl" { akey = Qnil; aval = Qnil; mark_akey = NULL; mark_aval = NULL; } #line 135 "hpricot_scan.rl" { mark_akey = p; } goto st116; st116: if ( ++p == pe ) goto _test_eof116; case 116: #line 4389 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr284; case 34: goto tr222; case 39: goto tr223; case 47: goto tr286; case 61: goto tr287; case 62: goto tr288; case 63: goto st116; case 95: goto st116; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr284; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st116; } else if ( (*p) >= 65 ) goto st116; } else goto st116; goto st97; tr286: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st117; tr225: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st117; tr308: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st117; st117: if ( ++p == pe ) goto _test_eof117; case 117: #line 4444 "hpricot_scan.c" switch( (*p) ) { case 34: goto tr222; case 39: goto tr223; case 62: goto tr289; } goto st97; tr212: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st205; tr220: #line 1 "NONE" {te = p+1;} #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st205; tr226: #line 1 "NONE" {te = p+1;} #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st205; tr288: #line 1 "NONE" {te = p+1;} #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st205; tr289: #line 1 "NONE" {te = p+1;} #line 70 "hpricot_common.rl" {act = 12;} goto st205; tr275: #line 1 "NONE" {te = p+1;} #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 68 "hpricot_common.rl" {act = 10;} goto st205; tr303: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 68 "hpricot_common.rl" {act = 10;} goto st205; tr281: #line 1 "NONE" {te = p+1;} #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st205; tr309: #line 1 "NONE" {te = p+1;} #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 68 "hpricot_common.rl" {act = 10;} goto st205; st205: if ( ++p == pe ) goto _test_eof205; case 205: #line 4592 "hpricot_scan.c" switch( (*p) ) { case 34: goto tr222; case 39: goto tr223; } goto st97; tr287: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st118; st118: if ( ++p == pe ) goto _test_eof118; case 118: #line 4606 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr290; case 32: goto tr290; case 34: goto tr292; case 39: goto tr293; case 47: goto tr294; case 60: goto st97; case 62: goto tr226; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr291; } else if ( (*p) >= 9 ) goto tr290; goto tr205; tr290: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st119; st119: if ( ++p == pe ) goto _test_eof119; case 119: #line 4630 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr295; case 32: goto tr295; case 34: goto tr292; case 39: goto tr293; case 47: goto tr294; case 60: goto st97; case 62: goto tr226; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr296; } else if ( (*p) >= 9 ) goto tr295; goto tr205; tr295: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st120; tr298: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st120; st120: if ( ++p == pe ) goto _test_eof120; case 120: #line 4663 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr295; case 32: goto tr295; case 34: goto tr292; case 39: goto tr293; case 47: goto tr294; case 60: goto st97; case 62: goto tr226; case 63: goto tr297; case 95: goto tr297; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr296; } else if ( (*p) >= 9 ) goto tr295; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr297; } else if ( (*p) >= 65 ) goto tr297; } else goto tr297; goto tr205; tr296: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st121; tr299: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st121; st121: if ( ++p == pe ) goto _test_eof121; case 121: #line 4707 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr298; case 32: goto tr298; case 34: goto tr300; case 39: goto tr301; case 47: goto tr302; case 60: goto st97; case 62: goto tr275; case 63: goto tr297; case 95: goto tr297; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr299; } else if ( (*p) >= 9 ) goto tr298; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr297; } else if ( (*p) >= 65 ) goto tr297; } else goto tr297; goto tr205; tr300: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st122; st122: if ( ++p == pe ) goto _test_eof122; case 122: #line 4742 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr206; case 32: goto tr206; case 34: goto tr272; case 39: goto tr209; case 47: goto tr302; case 60: goto tr211; case 62: goto tr303; case 63: goto tr297; case 95: goto tr297; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr207; } else if ( (*p) >= 9 ) goto tr206; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr297; } else if ( (*p) >= 65 ) goto tr297; } else goto tr297; goto tr205; tr294: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st123; tr218: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st123; tr274: #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st123; tr279: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st123; tr210: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } goto st123; tr302: #line 134 "hpricot_scan.rl" { mark_aval = p; } #line 168 "hpricot_scan.rl" { if (!S->xml && !NIL_P(akey)) akey = rb_funcall(akey, s_downcase, 0); ATTR(akey, aval); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st123; st123: if ( ++p == pe ) goto _test_eof123; case 123: #line 4854 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr214; case 32: goto tr214; case 34: goto tr216; case 39: goto tr217; case 47: goto tr218; case 60: goto st97; case 62: goto tr220; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr215; } else if ( (*p) >= 9 ) goto tr214; goto st95; tr301: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st124; st124: if ( ++p == pe ) goto _test_eof124; case 124: #line 4878 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr206; case 32: goto tr206; case 34: goto tr208; case 39: goto tr209; case 47: goto tr302; case 60: goto tr211; case 62: goto tr303; case 63: goto tr297; case 95: goto tr297; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr207; } else if ( (*p) >= 9 ) goto tr206; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr297; } else if ( (*p) >= 65 ) goto tr297; } else goto tr297; goto tr205; tr292: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st125; st125: if ( ++p == pe ) goto _test_eof125; case 125: #line 4913 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr304; case 34: goto tr305; case 39: goto tr306; case 47: goto tr308; case 62: goto tr309; case 63: goto tr307; case 95: goto tr307; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr304; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr307; } else if ( (*p) >= 65 ) goto tr307; } else goto tr307; goto tr211; tr293: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st126; st126: if ( ++p == pe ) goto _test_eof126; case 126: #line 4943 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr304; case 34: goto tr310; case 39: goto tr306; case 47: goto tr308; case 62: goto tr309; case 63: goto tr307; case 95: goto tr307; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr304; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr307; } else if ( (*p) >= 65 ) goto tr307; } else goto tr307; goto tr211; tr291: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st127; tr280: #line 143 "hpricot_scan.rl" { SET(akey, p); } goto st127; st127: if ( ++p == pe ) goto _test_eof127; case 127: #line 4977 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr298; case 32: goto tr298; case 34: goto tr300; case 39: goto tr301; case 47: goto tr210; case 60: goto st97; case 62: goto tr220; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr299; } else if ( (*p) >= 9 ) goto tr298; goto tr205; tr312: #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st128; tr277: #line 143 "hpricot_scan.rl" { SET(akey, p); } #line 139 "hpricot_scan.rl" { if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); } else { SET(aval, p); } } goto st128; st128: if ( ++p == pe ) goto _test_eof128; case 128: #line 5013 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr311; case 32: goto tr311; case 34: goto tr216; case 39: goto tr217; case 47: goto tr274; case 60: goto st97; case 61: goto st127; case 62: goto tr275; case 63: goto tr273; case 95: goto tr273; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr312; } else if ( (*p) >= 9 ) goto tr311; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr273; } else if ( (*p) >= 65 ) goto tr273; } else goto tr273; goto st95; tr266: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st129; st129: if ( ++p == pe ) goto _test_eof129; case 129: #line 5049 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr314; case 32: goto tr314; case 39: goto tr155; case 47: goto tr271; case 60: goto tr316; case 62: goto tr317; case 63: goto tr270; case 95: goto tr270; } if ( (*p) < 45 ) { if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr315; } else if ( (*p) >= 9 ) goto tr314; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr270; } else if ( (*p) >= 65 ) goto tr270; } else goto tr270; goto tr240; st130: if ( ++p == pe ) goto _test_eof130; case 130: switch( (*p) ) { case 34: goto tr305; case 39: goto tr306; } goto tr211; tr244: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st131; st131: if ( ++p == pe ) goto _test_eof131; case 131: #line 5092 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr318; case 39: goto tr319; case 47: goto tr321; case 62: goto tr322; case 63: goto tr320; case 95: goto tr320; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr318; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr320; } else if ( (*p) >= 65 ) goto tr320; } else goto tr320; goto tr316; tr241: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st132; st132: if ( ++p == pe ) goto _test_eof132; case 132: #line 5121 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr268; case 32: goto tr268; case 34: goto st130; case 39: goto tr244; case 47: goto tr245; case 60: goto st99; case 62: goto tr231; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr269; } else if ( (*p) >= 9 ) goto tr268; goto tr240; tr181: #line 138 "hpricot_scan.rl" { SET(aval, p); } goto st133; st133: if ( ++p == pe ) goto _test_eof133; case 133: #line 5145 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr323; case 34: goto tr319; case 47: goto tr325; case 62: goto tr326; case 63: goto tr324; case 95: goto tr324; } if ( (*p) < 45 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr323; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr324; } else if ( (*p) >= 65 ) goto tr324; } else goto tr324; goto tr157; st134: if ( ++p == pe ) goto _test_eof134; case 134: switch( (*p) ) { case 34: goto tr310; case 39: goto tr306; } goto tr211; st135: if ( ++p == pe ) goto _test_eof135; case 135: switch( (*p) ) { case 13: goto tr314; case 32: goto tr314; case 39: goto tr155; case 47: goto tr267; case 60: goto tr316; case 62: goto tr327; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr315; } else if ( (*p) >= 9 ) goto tr314; goto tr240; st136: if ( ++p == pe ) goto _test_eof136; case 136: if ( (*p) == 34 ) goto tr319; goto tr157; st137: if ( ++p == pe ) goto _test_eof137; case 137: if ( (*p) == 39 ) goto tr319; goto tr316; tr120: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st138; st138: if ( ++p == pe ) goto _test_eof138; case 138: #line 5215 "hpricot_scan.c" switch( (*p) ) { case 13: goto tr148; case 32: goto tr148; case 34: goto st136; case 39: goto st137; case 47: goto tr124; case 60: goto tr39; case 62: goto tr117; } if ( (*p) > 10 ) { if ( 11 <= (*p) && (*p) <= 12 ) goto tr149; } else if ( (*p) >= 9 ) goto tr148; goto tr119; st139: if ( ++p == pe ) goto _test_eof139; case 139: switch( (*p) ) { case 58: goto tr328; case 95: goto tr328; case 120: goto tr329; } if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr328; } else if ( (*p) >= 65 ) goto tr328; goto tr0; tr328: #line 46 "hpricot_common.rl" { TEXT_PASS(); } goto st140; st140: if ( ++p == pe ) goto _test_eof140; case 140: #line 5254 "hpricot_scan.c" switch( (*p) ) { case 32: goto st206; case 63: goto st140; case 95: goto st140; } if ( (*p) < 48 ) { if ( (*p) > 13 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st140; } else if ( (*p) >= 9 ) goto st206; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st140; } else if ( (*p) >= 65 ) goto st140; } else goto st140; goto tr0; st206: if ( ++p == pe ) goto _test_eof206; case 206: if ( (*p) == 32 ) goto st206; if ( 9 <= (*p) && (*p) <= 13 ) goto st206; goto tr406; tr329: #line 46 "hpricot_common.rl" { TEXT_PASS(); } goto st141; st141: if ( ++p == pe ) goto _test_eof141; case 141: #line 5292 "hpricot_scan.c" switch( (*p) ) { case 32: goto st206; case 63: goto st140; case 95: goto st140; case 109: goto st142; } if ( (*p) < 48 ) { if ( (*p) > 13 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st140; } else if ( (*p) >= 9 ) goto st206; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st140; } else if ( (*p) >= 65 ) goto st140; } else goto st140; goto tr0; st142: if ( ++p == pe ) goto _test_eof142; case 142: switch( (*p) ) { case 32: goto st206; case 63: goto st140; case 95: goto st140; case 108: goto st143; } if ( (*p) < 48 ) { if ( (*p) > 13 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st140; } else if ( (*p) >= 9 ) goto st206; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st140; } else if ( (*p) >= 65 ) goto st140; } else goto st140; goto tr0; st143: if ( ++p == pe ) goto _test_eof143; case 143: switch( (*p) ) { case 32: goto tr334; case 63: goto st140; case 95: goto st140; } if ( (*p) < 48 ) { if ( (*p) > 13 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st140; } else if ( (*p) >= 9 ) goto tr334; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st140; } else if ( (*p) >= 65 ) goto st140; } else goto st140; goto tr0; tr334: #line 1 "NONE" {te = p+1;} goto st207; st207: if ( ++p == pe ) goto _test_eof207; case 207: #line 5371 "hpricot_scan.c" switch( (*p) ) { case 32: goto tr334; case 118: goto st144; } if ( 9 <= (*p) && (*p) <= 13 ) goto tr334; goto tr406; st144: if ( ++p == pe ) goto _test_eof144; case 144: if ( (*p) == 101 ) goto st145; goto tr335; st145: if ( ++p == pe ) goto _test_eof145; case 145: if ( (*p) == 114 ) goto st146; goto tr335; st146: if ( ++p == pe ) goto _test_eof146; case 146: if ( (*p) == 115 ) goto st147; goto tr335; st147: if ( ++p == pe ) goto _test_eof147; case 147: if ( (*p) == 105 ) goto st148; goto tr335; st148: if ( ++p == pe ) goto _test_eof148; case 148: if ( (*p) == 111 ) goto st149; goto tr335; st149: if ( ++p == pe ) goto _test_eof149; case 149: if ( (*p) == 110 ) goto st150; goto tr335; st150: if ( ++p == pe ) goto _test_eof150; case 150: switch( (*p) ) { case 32: goto st150; case 61: goto st151; } if ( 9 <= (*p) && (*p) <= 13 ) goto st150; goto tr335; st151: if ( ++p == pe ) goto _test_eof151; case 151: switch( (*p) ) { case 32: goto st151; case 34: goto st152; case 39: goto st194; } if ( 9 <= (*p) && (*p) <= 13 ) goto st151; goto tr335; st152: if ( ++p == pe ) goto _test_eof152; case 152: if ( (*p) == 95 ) goto tr345; if ( (*p) < 48 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto tr345; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr345; } else if ( (*p) >= 65 ) goto tr345; } else goto tr345; goto tr335; tr345: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st153; st153: if ( ++p == pe ) goto _test_eof153; case 153: #line 5470 "hpricot_scan.c" switch( (*p) ) { case 34: goto tr346; case 95: goto st153; } if ( (*p) < 48 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st153; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st153; } else if ( (*p) >= 65 ) goto st153; } else goto st153; goto tr335; tr346: #line 144 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("version")), aval); } goto st154; st154: if ( ++p == pe ) goto _test_eof154; case 154: #line 5495 "hpricot_scan.c" switch( (*p) ) { case 32: goto st155; case 62: goto tr349; case 63: goto st156; } if ( 9 <= (*p) && (*p) <= 13 ) goto st155; goto tr335; st155: if ( ++p == pe ) goto _test_eof155; case 155: switch( (*p) ) { case 32: goto st155; case 62: goto tr349; case 63: goto st156; case 101: goto st157; case 115: goto st170; } if ( 9 <= (*p) && (*p) <= 13 ) goto st155; goto tr335; st156: if ( ++p == pe ) goto _test_eof156; case 156: if ( (*p) == 62 ) goto tr349; goto tr335; st157: if ( ++p == pe ) goto _test_eof157; case 157: if ( (*p) == 110 ) goto st158; goto tr335; st158: if ( ++p == pe ) goto _test_eof158; case 158: if ( (*p) == 99 ) goto st159; goto tr335; st159: if ( ++p == pe ) goto _test_eof159; case 159: if ( (*p) == 111 ) goto st160; goto tr335; st160: if ( ++p == pe ) goto _test_eof160; case 160: if ( (*p) == 100 ) goto st161; goto tr335; st161: if ( ++p == pe ) goto _test_eof161; case 161: if ( (*p) == 105 ) goto st162; goto tr335; st162: if ( ++p == pe ) goto _test_eof162; case 162: if ( (*p) == 110 ) goto st163; goto tr335; st163: if ( ++p == pe ) goto _test_eof163; case 163: if ( (*p) == 103 ) goto st164; goto tr335; st164: if ( ++p == pe ) goto _test_eof164; case 164: switch( (*p) ) { case 32: goto st164; case 61: goto st165; } if ( 9 <= (*p) && (*p) <= 13 ) goto st164; goto tr335; st165: if ( ++p == pe ) goto _test_eof165; case 165: switch( (*p) ) { case 32: goto st165; case 34: goto st166; case 39: goto st192; } if ( 9 <= (*p) && (*p) <= 13 ) goto st165; goto tr335; st166: if ( ++p == pe ) goto _test_eof166; case 166: if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr363; } else if ( (*p) >= 65 ) goto tr363; goto tr335; tr363: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st167; st167: if ( ++p == pe ) goto _test_eof167; case 167: #line 5615 "hpricot_scan.c" switch( (*p) ) { case 34: goto tr364; case 95: goto st167; } if ( (*p) < 48 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st167; } else if ( (*p) > 57 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st167; } else if ( (*p) >= 65 ) goto st167; } else goto st167; goto tr335; tr364: #line 145 "hpricot_scan.rl" { #ifdef HAVE_RUBY_ENCODING_H if (mark_aval < p) { char psave = *p; *p = '\0'; encoding_index = rb_enc_find_index(mark_aval); *p = psave; } #endif SET(aval, p); ATTR(ID2SYM(rb_intern("encoding")), aval); } goto st168; st168: if ( ++p == pe ) goto _test_eof168; case 168: #line 5651 "hpricot_scan.c" switch( (*p) ) { case 32: goto st169; case 62: goto tr349; case 63: goto st156; } if ( 9 <= (*p) && (*p) <= 13 ) goto st169; goto tr335; st169: if ( ++p == pe ) goto _test_eof169; case 169: switch( (*p) ) { case 32: goto st169; case 62: goto tr349; case 63: goto st156; case 115: goto st170; } if ( 9 <= (*p) && (*p) <= 13 ) goto st169; goto tr335; st170: if ( ++p == pe ) goto _test_eof170; case 170: if ( (*p) == 116 ) goto st171; goto tr335; st171: if ( ++p == pe ) goto _test_eof171; case 171: if ( (*p) == 97 ) goto st172; goto tr335; st172: if ( ++p == pe ) goto _test_eof172; case 172: if ( (*p) == 110 ) goto st173; goto tr335; st173: if ( ++p == pe ) goto _test_eof173; case 173: if ( (*p) == 100 ) goto st174; goto tr335; st174: if ( ++p == pe ) goto _test_eof174; case 174: if ( (*p) == 97 ) goto st175; goto tr335; st175: if ( ++p == pe ) goto _test_eof175; case 175: if ( (*p) == 108 ) goto st176; goto tr335; st176: if ( ++p == pe ) goto _test_eof176; case 176: if ( (*p) == 111 ) goto st177; goto tr335; st177: if ( ++p == pe ) goto _test_eof177; case 177: if ( (*p) == 110 ) goto st178; goto tr335; st178: if ( ++p == pe ) goto _test_eof178; case 178: if ( (*p) == 101 ) goto st179; goto tr335; st179: if ( ++p == pe ) goto _test_eof179; case 179: switch( (*p) ) { case 32: goto st179; case 61: goto st180; } if ( 9 <= (*p) && (*p) <= 13 ) goto st179; goto tr335; st180: if ( ++p == pe ) goto _test_eof180; case 180: switch( (*p) ) { case 32: goto st180; case 34: goto st181; case 39: goto st187; } if ( 9 <= (*p) && (*p) <= 13 ) goto st180; goto tr335; st181: if ( ++p == pe ) goto _test_eof181; case 181: switch( (*p) ) { case 110: goto tr379; case 121: goto tr380; } goto tr335; tr379: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st182; st182: if ( ++p == pe ) goto _test_eof182; case 182: #line 5776 "hpricot_scan.c" if ( (*p) == 111 ) goto st183; goto tr335; st183: if ( ++p == pe ) goto _test_eof183; case 183: if ( (*p) == 34 ) goto tr382; goto tr335; tr382: #line 157 "hpricot_scan.rl" { SET(aval, p); ATTR(ID2SYM(rb_intern("standalone")), aval); } goto st184; st184: if ( ++p == pe ) goto _test_eof184; case 184: #line 5795 "hpricot_scan.c" switch( (*p) ) { case 32: goto st184; case 62: goto tr349; case 63: goto st156; } if ( 9 <= (*p) && (*p) <= 13 ) goto st184; goto tr335; tr380: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st185; st185: if ( ++p == pe ) goto _test_eof185; case 185: #line 5812 "hpricot_scan.c" if ( (*p) == 101 ) goto st186; goto tr335; st186: if ( ++p == pe ) goto _test_eof186; case 186: if ( (*p) == 115 ) goto st183; goto tr335; st187: if ( ++p == pe ) goto _test_eof187; case 187: switch( (*p) ) { case 110: goto tr385; case 121: goto tr386; } goto tr335; tr385: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st188; st188: if ( ++p == pe ) goto _test_eof188; case 188: #line 5840 "hpricot_scan.c" if ( (*p) == 111 ) goto st189; goto tr335; st189: if ( ++p == pe ) goto _test_eof189; case 189: if ( (*p) == 39 ) goto tr382; goto tr335; tr386: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st190; st190: if ( ++p == pe ) goto _test_eof190; case 190: #line 5859 "hpricot_scan.c" if ( (*p) == 101 ) goto st191; goto tr335; st191: if ( ++p == pe ) goto _test_eof191; case 191: if ( (*p) == 115 ) goto st189; goto tr335; st192: if ( ++p == pe ) goto _test_eof192; case 192: if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr389; } else if ( (*p) >= 65 ) goto tr389; goto tr335; tr389: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st193; st193: if ( ++p == pe ) goto _test_eof193; case 193: #line 5888 "hpricot_scan.c" switch( (*p) ) { case 39: goto tr364; case 95: goto st193; } if ( (*p) < 48 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st193; } else if ( (*p) > 57 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st193; } else if ( (*p) >= 65 ) goto st193; } else goto st193; goto tr335; st194: if ( ++p == pe ) goto _test_eof194; case 194: if ( (*p) == 95 ) goto tr391; if ( (*p) < 48 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto tr391; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr391; } else if ( (*p) >= 65 ) goto tr391; } else goto tr391; goto tr335; tr391: #line 134 "hpricot_scan.rl" { mark_aval = p; } goto st195; st195: if ( ++p == pe ) goto _test_eof195; case 195: #line 5931 "hpricot_scan.c" switch( (*p) ) { case 39: goto tr346; case 95: goto st195; } if ( (*p) < 48 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto st195; } else if ( (*p) > 58 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st195; } else if ( (*p) >= 65 ) goto st195; } else goto st195; goto tr335; tr393: #line 51 "hpricot_common.rl" {{p = ((te))-1;}{ TEXT_PASS(); }} goto st208; tr394: #line 50 "hpricot_common.rl" { EBLK(comment, 3); {goto st198;} } #line 50 "hpricot_common.rl" {te = p+1;} goto st208; tr408: #line 51 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} goto st208; tr409: #line 9 "hpricot_common.rl" {curline += 1;} #line 51 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} goto st208; tr411: #line 51 "hpricot_common.rl" {te = p;p--;{ TEXT_PASS(); }} goto st208; st208: #line 1 "NONE" {ts = 0;} if ( ++p == pe ) goto _test_eof208; case 208: #line 1 "NONE" {ts = p;} #line 5980 "hpricot_scan.c" switch( (*p) ) { case 10: goto tr409; case 45: goto tr410; } goto tr408; tr410: #line 1 "NONE" {te = p+1;} goto st209; st209: if ( ++p == pe ) goto _test_eof209; case 209: #line 5994 "hpricot_scan.c" if ( (*p) == 45 ) goto st196; goto tr411; st196: if ( ++p == pe ) goto _test_eof196; case 196: if ( (*p) == 62 ) goto tr394; goto tr393; tr395: #line 56 "hpricot_common.rl" {{p = ((te))-1;}{ TEXT_PASS(); }} goto st210; tr396: #line 55 "hpricot_common.rl" { EBLK(cdata, 3); {goto st198;} } #line 55 "hpricot_common.rl" {te = p+1;} goto st210; tr413: #line 56 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} goto st210; tr414: #line 9 "hpricot_common.rl" {curline += 1;} #line 56 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} goto st210; tr416: #line 56 "hpricot_common.rl" {te = p;p--;{ TEXT_PASS(); }} goto st210; st210: #line 1 "NONE" {ts = 0;} if ( ++p == pe ) goto _test_eof210; case 210: #line 1 "NONE" {ts = p;} #line 6037 "hpricot_scan.c" switch( (*p) ) { case 10: goto tr414; case 93: goto tr415; } goto tr413; tr415: #line 1 "NONE" {te = p+1;} goto st211; st211: if ( ++p == pe ) goto _test_eof211; case 211: #line 6051 "hpricot_scan.c" if ( (*p) == 93 ) goto st197; goto tr416; st197: if ( ++p == pe ) goto _test_eof197; case 197: if ( (*p) == 62 ) goto tr396; goto tr395; tr418: #line 61 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} goto st212; tr419: #line 9 "hpricot_common.rl" {curline += 1;} #line 61 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} goto st212; tr420: #line 60 "hpricot_common.rl" { EBLK(procins, 2); {goto st198;} } #line 60 "hpricot_common.rl" {te = p+1;} goto st212; tr422: #line 61 "hpricot_common.rl" {te = p;p--;{ TEXT_PASS(); }} goto st212; st212: #line 1 "NONE" {ts = 0;} if ( ++p == pe ) goto _test_eof212; case 212: #line 1 "NONE" {ts = p;} #line 6090 "hpricot_scan.c" switch( (*p) ) { case 10: goto tr419; case 62: goto tr420; case 63: goto st213; } goto tr418; st213: if ( ++p == pe ) goto _test_eof213; case 213: if ( (*p) == 62 ) goto tr420; goto tr422; } _test_eof198: cs = 198; goto _test_eof; _test_eof199: cs = 199; goto _test_eof; _test_eof0: cs = 0; goto _test_eof; _test_eof1: cs = 1; goto _test_eof; _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; _test_eof13: cs = 13; goto _test_eof; _test_eof14: cs = 14; goto _test_eof; _test_eof15: cs = 15; goto _test_eof; _test_eof16: cs = 16; goto _test_eof; _test_eof17: cs = 17; goto _test_eof; _test_eof18: cs = 18; goto _test_eof; _test_eof19: cs = 19; goto _test_eof; _test_eof20: cs = 20; goto _test_eof; _test_eof21: cs = 21; goto _test_eof; _test_eof22: cs = 22; goto _test_eof; _test_eof23: cs = 23; goto _test_eof; _test_eof24: cs = 24; goto _test_eof; _test_eof25: cs = 25; goto _test_eof; _test_eof26: cs = 26; goto _test_eof; _test_eof27: cs = 27; goto _test_eof; _test_eof28: cs = 28; goto _test_eof; _test_eof29: cs = 29; goto _test_eof; _test_eof30: cs = 30; goto _test_eof; _test_eof31: cs = 31; goto _test_eof; _test_eof32: cs = 32; goto _test_eof; _test_eof33: cs = 33; goto _test_eof; _test_eof34: cs = 34; goto _test_eof; _test_eof35: cs = 35; goto _test_eof; _test_eof36: cs = 36; goto _test_eof; _test_eof37: cs = 37; goto _test_eof; _test_eof38: cs = 38; goto _test_eof; _test_eof39: cs = 39; goto _test_eof; _test_eof200: cs = 200; goto _test_eof; _test_eof40: cs = 40; goto _test_eof; _test_eof41: cs = 41; goto _test_eof; _test_eof201: cs = 201; goto _test_eof; _test_eof42: cs = 42; goto _test_eof; _test_eof43: cs = 43; goto _test_eof; _test_eof202: cs = 202; goto _test_eof; _test_eof44: cs = 44; goto _test_eof; _test_eof45: cs = 45; goto _test_eof; _test_eof46: cs = 46; goto _test_eof; _test_eof47: cs = 47; goto _test_eof; _test_eof48: cs = 48; goto _test_eof; _test_eof49: cs = 49; goto _test_eof; _test_eof50: cs = 50; goto _test_eof; _test_eof51: cs = 51; goto _test_eof; _test_eof52: cs = 52; goto _test_eof; _test_eof53: cs = 53; goto _test_eof; _test_eof54: cs = 54; goto _test_eof; _test_eof55: cs = 55; goto _test_eof; _test_eof56: cs = 56; goto _test_eof; _test_eof57: cs = 57; goto _test_eof; _test_eof58: cs = 58; goto _test_eof; _test_eof59: cs = 59; goto _test_eof; _test_eof60: cs = 60; goto _test_eof; _test_eof61: cs = 61; goto _test_eof; _test_eof62: cs = 62; goto _test_eof; _test_eof63: cs = 63; goto _test_eof; _test_eof64: cs = 64; goto _test_eof; _test_eof65: cs = 65; goto _test_eof; _test_eof66: cs = 66; goto _test_eof; _test_eof67: cs = 67; goto _test_eof; _test_eof68: cs = 68; goto _test_eof; _test_eof69: cs = 69; goto _test_eof; _test_eof70: cs = 70; goto _test_eof; _test_eof71: cs = 71; goto _test_eof; _test_eof72: cs = 72; goto _test_eof; _test_eof73: cs = 73; goto _test_eof; _test_eof74: cs = 74; goto _test_eof; _test_eof75: cs = 75; goto _test_eof; _test_eof76: cs = 76; goto _test_eof; _test_eof77: cs = 77; goto _test_eof; _test_eof78: cs = 78; goto _test_eof; _test_eof79: cs = 79; goto _test_eof; _test_eof80: cs = 80; goto _test_eof; _test_eof81: cs = 81; goto _test_eof; _test_eof82: cs = 82; goto _test_eof; _test_eof83: cs = 83; goto _test_eof; _test_eof203: cs = 203; goto _test_eof; _test_eof84: cs = 84; goto _test_eof; _test_eof85: cs = 85; goto _test_eof; _test_eof86: cs = 86; goto _test_eof; _test_eof87: cs = 87; goto _test_eof; _test_eof88: cs = 88; goto _test_eof; _test_eof89: cs = 89; goto _test_eof; _test_eof90: cs = 90; goto _test_eof; _test_eof91: cs = 91; goto _test_eof; _test_eof92: cs = 92; goto _test_eof; _test_eof93: cs = 93; goto _test_eof; _test_eof94: cs = 94; goto _test_eof; _test_eof95: cs = 95; goto _test_eof; _test_eof96: cs = 96; goto _test_eof; _test_eof97: cs = 97; goto _test_eof; _test_eof98: cs = 98; goto _test_eof; _test_eof99: cs = 99; goto _test_eof; _test_eof100: cs = 100; goto _test_eof; _test_eof101: cs = 101; goto _test_eof; _test_eof102: cs = 102; goto _test_eof; _test_eof204: cs = 204; goto _test_eof; _test_eof103: cs = 103; goto _test_eof; _test_eof104: cs = 104; goto _test_eof; _test_eof105: cs = 105; goto _test_eof; _test_eof106: cs = 106; goto _test_eof; _test_eof107: cs = 107; goto _test_eof; _test_eof108: cs = 108; goto _test_eof; _test_eof109: cs = 109; goto _test_eof; _test_eof110: cs = 110; goto _test_eof; _test_eof111: cs = 111; goto _test_eof; _test_eof112: cs = 112; goto _test_eof; _test_eof113: cs = 113; goto _test_eof; _test_eof114: cs = 114; goto _test_eof; _test_eof115: cs = 115; goto _test_eof; _test_eof116: cs = 116; goto _test_eof; _test_eof117: cs = 117; goto _test_eof; _test_eof205: cs = 205; goto _test_eof; _test_eof118: cs = 118; goto _test_eof; _test_eof119: cs = 119; goto _test_eof; _test_eof120: cs = 120; goto _test_eof; _test_eof121: cs = 121; goto _test_eof; _test_eof122: cs = 122; goto _test_eof; _test_eof123: cs = 123; goto _test_eof; _test_eof124: cs = 124; goto _test_eof; _test_eof125: cs = 125; goto _test_eof; _test_eof126: cs = 126; goto _test_eof; _test_eof127: cs = 127; goto _test_eof; _test_eof128: cs = 128; goto _test_eof; _test_eof129: cs = 129; goto _test_eof; _test_eof130: cs = 130; goto _test_eof; _test_eof131: cs = 131; goto _test_eof; _test_eof132: cs = 132; goto _test_eof; _test_eof133: cs = 133; goto _test_eof; _test_eof134: cs = 134; goto _test_eof; _test_eof135: cs = 135; goto _test_eof; _test_eof136: cs = 136; goto _test_eof; _test_eof137: cs = 137; goto _test_eof; _test_eof138: cs = 138; goto _test_eof; _test_eof139: cs = 139; goto _test_eof; _test_eof140: cs = 140; goto _test_eof; _test_eof206: cs = 206; goto _test_eof; _test_eof141: cs = 141; goto _test_eof; _test_eof142: cs = 142; goto _test_eof; _test_eof143: cs = 143; goto _test_eof; _test_eof207: cs = 207; goto _test_eof; _test_eof144: cs = 144; goto _test_eof; _test_eof145: cs = 145; goto _test_eof; _test_eof146: cs = 146; goto _test_eof; _test_eof147: cs = 147; goto _test_eof; _test_eof148: cs = 148; goto _test_eof; _test_eof149: cs = 149; goto _test_eof; _test_eof150: cs = 150; goto _test_eof; _test_eof151: cs = 151; goto _test_eof; _test_eof152: cs = 152; goto _test_eof; _test_eof153: cs = 153; goto _test_eof; _test_eof154: cs = 154; goto _test_eof; _test_eof155: cs = 155; goto _test_eof; _test_eof156: cs = 156; goto _test_eof; _test_eof157: cs = 157; goto _test_eof; _test_eof158: cs = 158; goto _test_eof; _test_eof159: cs = 159; goto _test_eof; _test_eof160: cs = 160; goto _test_eof; _test_eof161: cs = 161; goto _test_eof; _test_eof162: cs = 162; goto _test_eof; _test_eof163: cs = 163; goto _test_eof; _test_eof164: cs = 164; goto _test_eof; _test_eof165: cs = 165; goto _test_eof; _test_eof166: cs = 166; goto _test_eof; _test_eof167: cs = 167; goto _test_eof; _test_eof168: cs = 168; goto _test_eof; _test_eof169: cs = 169; goto _test_eof; _test_eof170: cs = 170; goto _test_eof; _test_eof171: cs = 171; goto _test_eof; _test_eof172: cs = 172; goto _test_eof; _test_eof173: cs = 173; goto _test_eof; _test_eof174: cs = 174; goto _test_eof; _test_eof175: cs = 175; goto _test_eof; _test_eof176: cs = 176; goto _test_eof; _test_eof177: cs = 177; goto _test_eof; _test_eof178: cs = 178; goto _test_eof; _test_eof179: cs = 179; goto _test_eof; _test_eof180: cs = 180; goto _test_eof; _test_eof181: cs = 181; goto _test_eof; _test_eof182: cs = 182; goto _test_eof; _test_eof183: cs = 183; goto _test_eof; _test_eof184: cs = 184; goto _test_eof; _test_eof185: cs = 185; goto _test_eof; _test_eof186: cs = 186; goto _test_eof; _test_eof187: cs = 187; goto _test_eof; _test_eof188: cs = 188; goto _test_eof; _test_eof189: cs = 189; goto _test_eof; _test_eof190: cs = 190; goto _test_eof; _test_eof191: cs = 191; goto _test_eof; _test_eof192: cs = 192; goto _test_eof; _test_eof193: cs = 193; goto _test_eof; _test_eof194: cs = 194; goto _test_eof; _test_eof195: cs = 195; goto _test_eof; _test_eof208: cs = 208; goto _test_eof; _test_eof209: cs = 209; goto _test_eof; _test_eof196: cs = 196; goto _test_eof; _test_eof210: cs = 210; goto _test_eof; _test_eof211: cs = 211; goto _test_eof; _test_eof197: cs = 197; goto _test_eof; _test_eof212: cs = 212; goto _test_eof; _test_eof213: cs = 213; goto _test_eof; _test_eof: {} if ( p == eof ) { switch ( cs ) { case 199: goto tr400; case 0: goto tr0; case 1: goto tr0; case 2: goto tr0; case 3: goto tr0; case 4: goto tr0; case 5: goto tr0; case 6: goto tr0; case 7: goto tr0; case 8: goto tr0; case 9: goto tr0; case 10: goto tr0; case 11: goto tr0; case 12: goto tr0; case 13: goto tr0; case 14: goto tr0; case 15: goto tr0; case 16: goto tr0; case 17: goto tr0; case 18: goto tr0; case 19: goto tr0; case 20: goto tr0; case 21: goto tr0; case 22: goto tr0; case 23: goto tr0; case 24: goto tr39; case 25: goto tr39; case 26: goto tr39; case 27: goto tr39; case 28: goto tr0; case 29: goto tr39; case 30: goto tr0; case 31: goto tr0; case 32: goto tr0; case 33: goto tr0; case 34: goto tr0; case 35: goto tr0; case 36: goto tr0; case 37: goto tr0; case 38: goto tr0; case 39: goto tr0; case 200: goto tr405; case 40: goto tr0; case 41: goto tr0; case 201: goto tr405; case 42: goto tr0; case 43: goto tr0; case 202: goto tr405; case 44: goto tr0; case 45: goto tr0; case 46: goto tr0; case 47: goto tr0; case 48: goto tr0; case 49: goto tr0; case 50: goto tr0; case 51: goto tr0; case 52: goto tr0; case 53: goto tr0; case 54: goto tr0; case 55: goto tr0; case 56: goto tr0; case 57: goto tr0; case 58: goto tr0; case 59: goto tr0; case 60: goto tr0; case 61: goto tr0; case 62: goto tr0; case 63: goto tr0; case 64: goto tr39; case 65: goto tr39; case 66: goto tr39; case 67: goto tr39; case 68: goto tr39; case 69: goto tr39; case 70: goto tr39; case 71: goto tr39; case 72: goto tr39; case 73: goto tr39; case 74: goto tr39; case 75: goto tr39; case 76: goto tr39; case 77: goto tr39; case 78: goto tr39; case 79: goto tr39; case 80: goto tr39; case 81: goto tr39; case 82: goto tr39; case 83: goto tr39; case 203: goto tr39; case 84: goto tr39; case 85: goto tr39; case 86: goto tr39; case 87: goto tr39; case 88: goto tr39; case 89: goto tr39; case 90: goto tr39; case 91: goto tr39; case 92: goto tr39; case 93: goto tr39; case 94: goto tr39; case 95: goto tr39; case 96: goto tr39; case 97: goto tr39; case 98: goto tr39; case 99: goto tr39; case 100: goto tr39; case 101: goto tr39; case 102: goto tr39; case 204: goto tr39; case 103: goto tr39; case 104: goto tr39; case 105: goto tr39; case 106: goto tr39; case 107: goto tr39; case 108: goto tr39; case 109: goto tr39; case 110: goto tr39; case 111: goto tr39; case 112: goto tr39; case 113: goto tr39; case 114: goto tr39; case 115: goto tr39; case 116: goto tr39; case 117: goto tr39; case 205: goto tr39; case 118: goto tr39; case 119: goto tr39; case 120: goto tr39; case 121: goto tr39; case 122: goto tr39; case 123: goto tr39; case 124: goto tr39; case 125: goto tr39; case 126: goto tr39; case 127: goto tr39; case 128: goto tr39; case 129: goto tr39; case 130: goto tr39; case 131: goto tr39; case 132: goto tr39; case 133: goto tr39; case 134: goto tr39; case 135: goto tr39; case 136: goto tr39; case 137: goto tr39; case 138: goto tr39; case 139: goto tr0; case 140: goto tr0; case 206: goto tr406; case 141: goto tr0; case 142: goto tr0; case 143: goto tr0; case 207: goto tr406; case 144: goto tr335; case 145: goto tr335; case 146: goto tr335; case 147: goto tr335; case 148: goto tr335; case 149: goto tr335; case 150: goto tr335; case 151: goto tr335; case 152: goto tr335; case 153: goto tr335; case 154: goto tr335; case 155: goto tr335; case 156: goto tr335; case 157: goto tr335; case 158: goto tr335; case 159: goto tr335; case 160: goto tr335; case 161: goto tr335; case 162: goto tr335; case 163: goto tr335; case 164: goto tr335; case 165: goto tr335; case 166: goto tr335; case 167: goto tr335; case 168: goto tr335; case 169: goto tr335; case 170: goto tr335; case 171: goto tr335; case 172: goto tr335; case 173: goto tr335; case 174: goto tr335; case 175: goto tr335; case 176: goto tr335; case 177: goto tr335; case 178: goto tr335; case 179: goto tr335; case 180: goto tr335; case 181: goto tr335; case 182: goto tr335; case 183: goto tr335; case 184: goto tr335; case 185: goto tr335; case 186: goto tr335; case 187: goto tr335; case 188: goto tr335; case 189: goto tr335; case 190: goto tr335; case 191: goto tr335; case 192: goto tr335; case 193: goto tr335; case 194: goto tr335; case 195: goto tr335; case 209: goto tr411; case 196: goto tr393; case 211: goto tr416; case 197: goto tr395; case 213: goto tr422; } } } #line 602 "hpricot_scan.rl" if (cs == hpricot_scan_error) { if (buf != NULL) free(buf); if (!NIL_P(tag)) { rb_raise(rb_eHpricotParseError, "parse error on element <%s>, starting on line %d.\n" NO_WAY_SERIOUSLY, RSTRING_PTR(tag), curline); } else { rb_raise(rb_eHpricotParseError, "parse error on line %d.\n" NO_WAY_SERIOUSLY, curline); } } if (done && ele_open) { ele_open = 0; if (ts > 0) { mark_tag = ts; ts = 0; text = 1; } } if (ts == 0) { have = 0; /* text nodes have no ts because each byte is parsed alone */ if (mark_tag != NULL && text == 1) { if (done) { if (mark_tag < p-1) { CAT(tag, p-1); ELE(text); } } else { CAT(tag, p); } } if (io) mark_tag = buf; else mark_tag = RSTRING_PTR(port); } else if (io) { have = pe - ts; memmove(buf, ts, have); SLIDE(tag); SLIDE(akey); SLIDE(aval); te = buf + (te - ts); ts = buf; } } if (buf != NULL) free(buf); if (S != NULL) { VALUE doc = S->doc; rb_gc_unregister_address(&S->doc); free(S); return doc; } return Qnil; } void hstruct_mark(void* ptr) { struct hpricot_struct* st = (struct hpricot_struct*)ptr; int i; /* it's likely to hit GC when allocating st->ptr. * that should be checked to avoid segfault. * and simply ignore it. */ if (st->ptr) { for(i = 0; i < st->len; i++) { rb_gc_mark(st->ptr[i]); } } } void hstruct_free(void* ptr) { struct hpricot_struct* st = (struct hpricot_struct*)ptr; free(st->ptr); free(st); } static VALUE alloc_hpricot_struct8(VALUE klass) { VALUE obj; struct hpricot_struct* st; obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st); st->len = 8; st->ptr = ALLOC_N(VALUE, 8); rb_mem_clear(st->ptr, 8); return obj; } static VALUE alloc_hpricot_struct2(VALUE klass) { VALUE obj; struct hpricot_struct* st; obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st); st->len = 2; st->ptr = ALLOC_N(VALUE, 2); rb_mem_clear(st->ptr, 2); return obj; } static VALUE alloc_hpricot_struct3(VALUE klass) { VALUE obj; struct hpricot_struct* st; obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st); st->len = 3; st->ptr = ALLOC_N(VALUE, 3); rb_mem_clear(st->ptr, 3); return obj; } static VALUE hpricot_struct_ref0(VALUE obj) {return H_ELE_GET(obj, 0);} static VALUE hpricot_struct_ref1(VALUE obj) {return H_ELE_GET(obj, 1);} static VALUE hpricot_struct_ref2(VALUE obj) {return H_ELE_GET(obj, 2);} static VALUE hpricot_struct_ref3(VALUE obj) {return H_ELE_GET(obj, 3);} static VALUE hpricot_struct_ref4(VALUE obj) {return H_ELE_GET(obj, 4);} static VALUE hpricot_struct_ref5(VALUE obj) {return H_ELE_GET(obj, 5);} static VALUE hpricot_struct_ref6(VALUE obj) {return H_ELE_GET(obj, 6);} static VALUE hpricot_struct_ref7(VALUE obj) {return H_ELE_GET(obj, 7);} static VALUE hpricot_struct_ref8(VALUE obj) {return H_ELE_GET(obj, 8);} static VALUE hpricot_struct_ref9(VALUE obj) {return H_ELE_GET(obj, 9);} static VALUE (*ref_func[10])() = { hpricot_struct_ref0, hpricot_struct_ref1, hpricot_struct_ref2, hpricot_struct_ref3, hpricot_struct_ref4, hpricot_struct_ref5, hpricot_struct_ref6, hpricot_struct_ref7, hpricot_struct_ref8, hpricot_struct_ref9, }; static VALUE hpricot_struct_set0(VALUE obj, VALUE val) {return H_ELE_SET(obj, 0, val);} static VALUE hpricot_struct_set1(VALUE obj, VALUE val) {return H_ELE_SET(obj, 1, val);} static VALUE hpricot_struct_set2(VALUE obj, VALUE val) {return H_ELE_SET(obj, 2, val);} static VALUE hpricot_struct_set3(VALUE obj, VALUE val) {return H_ELE_SET(obj, 3, val);} static VALUE hpricot_struct_set4(VALUE obj, VALUE val) {return H_ELE_SET(obj, 4, val);} static VALUE hpricot_struct_set5(VALUE obj, VALUE val) {return H_ELE_SET(obj, 5, val);} static VALUE hpricot_struct_set6(VALUE obj, VALUE val) {return H_ELE_SET(obj, 6, val);} static VALUE hpricot_struct_set7(VALUE obj, VALUE val) {return H_ELE_SET(obj, 7, val);} static VALUE hpricot_struct_set8(VALUE obj, VALUE val) {return H_ELE_SET(obj, 8, val);} static VALUE hpricot_struct_set9(VALUE obj, VALUE val) {return H_ELE_SET(obj, 9, val);} static VALUE (*set_func[10])() = { hpricot_struct_set0, hpricot_struct_set1, hpricot_struct_set2, hpricot_struct_set3, hpricot_struct_set4, hpricot_struct_set5, hpricot_struct_set6, hpricot_struct_set7, hpricot_struct_set8, hpricot_struct_set9, }; static VALUE make_hpricot_struct(VALUE members, VALUE (*alloc)(VALUE klass)) { int i = 0; char attr_set[128]; VALUE klass = rb_class_new(rb_cObject); rb_define_alloc_func(klass, alloc); int len = RARRAY_LEN(members); assert(len < 10); for (i = 0; i < len; i++) { ID id = SYM2ID(rb_ary_entry(members, i)); const char* name = rb_id2name(id); int len = strlen(name); memcpy(attr_set, name, strlen(name)); attr_set[len] = '='; attr_set[len+1] = 0; rb_define_method(klass, name, ref_func[i], 0); rb_define_method(klass, attr_set, set_func[i], 1); } return klass; } void Init_hpricot_scan() { VALUE structElem, structAttr, structBasic; s_ElementContent = rb_intern("ElementContent"); symAllow = ID2SYM(rb_intern("allow")); symDeny = ID2SYM(rb_intern("deny")); s_downcase = rb_intern("downcase"); s_new = rb_intern("new"); s_parent = rb_intern("parent"); s_read = rb_intern("read"); s_to_str = rb_intern("to_str"); sym_xmldecl = ID2SYM(rb_intern("xmldecl")); sym_doctype = ID2SYM(rb_intern("doctype")); sym_procins = ID2SYM(rb_intern("procins")); sym_stag = ID2SYM(rb_intern("stag")); sym_etag = ID2SYM(rb_intern("etag")); sym_emptytag = ID2SYM(rb_intern("emptytag")); sym_allowed = ID2SYM(rb_intern("allowed")); sym_children = ID2SYM(rb_intern("children")); sym_comment = ID2SYM(rb_intern("comment")); sym_cdata = ID2SYM(rb_intern("cdata")); sym_name = ID2SYM(rb_intern("name")); sym_parent = ID2SYM(rb_intern("parent")); sym_raw_attributes = ID2SYM(rb_intern("raw_attributes")); sym_raw_string = ID2SYM(rb_intern("raw_string")); sym_tagno = ID2SYM(rb_intern("tagno")); sym_text = ID2SYM(rb_intern("text")); sym_EMPTY = ID2SYM(rb_intern("EMPTY")); sym_CDATA = ID2SYM(rb_intern("CDATA")); mHpricot = rb_define_module("Hpricot"); rb_define_attr(rb_singleton_class(mHpricot), "buffer_size", 1, 1); rb_define_singleton_method(mHpricot, "scan", hpricot_scan, -1); rb_define_singleton_method(mHpricot, "css", hpricot_css, 3); rb_eHpricotParseError = rb_define_class_under(mHpricot, "ParseError", rb_eStandardError); structElem = make_hpricot_struct(rb_ary_new3(8, sym_name, sym_parent, sym_raw_attributes, sym_etag, sym_raw_string, sym_allowed, sym_tagno, sym_children), alloc_hpricot_struct8); structAttr = make_hpricot_struct( rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes), alloc_hpricot_struct3); structBasic = make_hpricot_struct( rb_ary_new3(2, sym_name, sym_parent), alloc_hpricot_struct2); cDoc = rb_define_class_under(mHpricot, "Doc", structElem); cCData = rb_define_class_under(mHpricot, "CData", structBasic); rb_define_method(cCData, "content", hpricot_ele_get_name, 0); rb_define_method(cCData, "content=", hpricot_ele_set_name, 1); cComment = rb_define_class_under(mHpricot, "Comment", structBasic); rb_define_method(cComment, "content", hpricot_ele_get_name, 0); rb_define_method(cComment, "content=", hpricot_ele_set_name, 1); cDocType = rb_define_class_under(mHpricot, "DocType", structAttr); rb_define_method(cDocType, "raw_string", hpricot_ele_get_name, 0); rb_define_method(cDocType, "clear_raw", hpricot_ele_clear_name, 0); rb_define_method(cDocType, "target", hpricot_ele_get_target, 0); rb_define_method(cDocType, "target=", hpricot_ele_set_target, 1); rb_define_method(cDocType, "public_id", hpricot_ele_get_public_id, 0); rb_define_method(cDocType, "public_id=", hpricot_ele_set_public_id, 1); rb_define_method(cDocType, "system_id", hpricot_ele_get_system_id, 0); rb_define_method(cDocType, "system_id=", hpricot_ele_set_system_id, 1); cElem = rb_define_class_under(mHpricot, "Elem", structElem); rb_define_method(cElem, "clear_raw", hpricot_ele_clear_raw, 0); cBogusETag = rb_define_class_under(mHpricot, "BogusETag", structAttr); rb_define_method(cBogusETag, "raw_string", hpricot_ele_get_attr, 0); rb_define_method(cBogusETag, "clear_raw", hpricot_ele_clear_attr, 0); cText = rb_define_class_under(mHpricot, "Text", structBasic); rb_define_method(cText, "raw_string", hpricot_ele_get_name, 0); rb_define_method(cText, "clear_raw", hpricot_ele_clear_name, 0); rb_define_method(cText, "content", hpricot_ele_get_name, 0); rb_define_method(cText, "content=", hpricot_ele_set_name, 1); cXMLDecl = rb_define_class_under(mHpricot, "XMLDecl", structAttr); rb_define_method(cXMLDecl, "raw_string", hpricot_ele_get_name, 0); rb_define_method(cXMLDecl, "clear_raw", hpricot_ele_clear_name, 0); rb_define_method(cXMLDecl, "encoding", hpricot_ele_get_encoding, 0); rb_define_method(cXMLDecl, "encoding=", hpricot_ele_set_encoding, 1); rb_define_method(cXMLDecl, "standalone", hpricot_ele_get_standalone, 0); rb_define_method(cXMLDecl, "standalone=", hpricot_ele_set_standalone, 1); rb_define_method(cXMLDecl, "version", hpricot_ele_get_version, 0); rb_define_method(cXMLDecl, "version=", hpricot_ele_set_version, 1); cProcIns = rb_define_class_under(mHpricot, "ProcIns", structAttr); rb_define_method(cProcIns, "target", hpricot_ele_get_name, 0); rb_define_method(cProcIns, "target=", hpricot_ele_set_name, 1); rb_define_method(cProcIns, "content", hpricot_ele_get_attr, 0); rb_define_method(cProcIns, "content=", hpricot_ele_set_attr, 1); rb_const_set(mHpricot, rb_intern("ProcInsParse"), reProcInsParse = rb_eval_string("/\\A<\\?(\\S+)\\s+(.+)/m")); } hpricot-0.8.6/ext/hpricot_scan/hpricot_css.c0000644000175000017500000017156411710073440020526 0ustar boutilboutil #line 1 "hpricot_css.rl" /* * hpricot_css.rl * ragel -C hpricot_css.rl -o hpricot_css.c * * Copyright (C) 2008 why the lucky stiff */ #include #define FILTER(id) \ rb_funcall2(mod, rb_intern("" # id), fargs, fvals); \ rb_ary_clear(tmpt); \ fargs = 1 #define FILTERAUTO() \ char filt[10]; \ sprintf(filt, "%.*s", te - ts, ts); \ rb_funcall2(mod, rb_intern(filt), fargs, fvals); \ rb_ary_clear(tmpt); \ fargs = 1 #ifdef HAVE_RUBY_ENCODING_H #define STRNEW(a, len) rb_external_str_new((a), (len)) #else #define STRNEW(a, len) rb_str_new((a), (len)) #endif #define PUSH(aps, ape) rb_ary_push(tmpt, fvals[fargs++] = STRNEW(aps, ape - aps)) #define P(id) printf(id ": %.*s\n", te - ts, ts); #line 31 "hpricot_css.c" static const int hpricot_css_start = 87; static const int hpricot_css_error = 0; static const int hpricot_css_en_main = 87; #line 92 "hpricot_css.rl" VALUE hpricot_css(VALUE self, VALUE mod, VALUE str, VALUE node) { int cs, act, eof; char *p, *pe, *ts, *te, *aps, *ape, *aps2, *ape2; int fargs = 1; VALUE fvals[6]; VALUE focus = rb_ary_new3(1, node); VALUE tmpt = rb_ary_new(); rb_gc_register_address(&focus); rb_gc_register_address(&tmpt); fvals[0] = focus; if (TYPE(str) != T_STRING) rb_raise(rb_eArgError, "bad CSS selector, String only please."); StringValue(str); p = RSTRING_PTR(str); pe = p + RSTRING_LEN(str); #line 62 "hpricot_css.c" { cs = hpricot_css_start; ts = 0; te = 0; act = 0; } #line 115 "hpricot_css.rl" #line 72 "hpricot_css.c" { if ( p == pe ) goto _test_eof; switch ( cs ) { tr0: #line 1 "NONE" { switch( act ) { case 0: {{goto st0;}} break; case 1: {{p = ((te))-1;} FILTER(ID); } break; case 2: {{p = ((te))-1;} FILTER(CLASS); } break; case 5: {{p = ((te))-1;} FILTER(TAG); } break; case 7: {{p = ((te))-1;} FILTER(CHILD); } break; case 8: {{p = ((te))-1;} FILTER(POS); } break; case 9: {{p = ((te))-1;} FILTER(PSUEDO); } break; } } goto st87; tr4: #line 88 "hpricot_css.rl" {{p = ((te))-1;}} goto st87; tr41: #line 85 "hpricot_css.rl" {{p = ((te))-1;}{ FILTER(PSUEDO); }} goto st87; tr46: #line 30 "hpricot_css.rl" { aps = p; } #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 85 "hpricot_css.rl" {te = p+1;{ FILTER(PSUEDO); }} goto st87; tr48: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 85 "hpricot_css.rl" {te = p+1;{ FILTER(PSUEDO); }} goto st87; tr62: #line 84 "hpricot_css.rl" {{p = ((te))-1;}{ FILTER(POS); }} goto st87; tr64: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 84 "hpricot_css.rl" {te = p+1;{ FILTER(POS); }} goto st87; tr66: #line 83 "hpricot_css.rl" {{p = ((te))-1;}{ FILTER(CHILD); }} goto st87; tr67: #line 30 "hpricot_css.rl" { aps = p; } #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 83 "hpricot_css.rl" {te = p+1;{ FILTER(CHILD); }} goto st87; tr71: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 83 "hpricot_css.rl" {te = p+1;{ FILTER(CHILD); }} goto st87; tr100: #line 80 "hpricot_css.rl" {te = p+1;{ FILTER(ATTR); }} goto st87; tr105: #line 80 "hpricot_css.rl" {{p = ((te))-1;}{ FILTER(ATTR); }} goto st87; tr132: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 79 "hpricot_css.rl" {te = p+1;{ FILTER(NAME); }} goto st87; tr143: #line 87 "hpricot_css.rl" {te = p+1;{ FILTERAUTO(); }} goto st87; tr149: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 81 "hpricot_css.rl" {te = p;p--;{ FILTER(TAG); }} goto st87; tr153: #line 88 "hpricot_css.rl" {te = p;p--;} goto st87; tr154: #line 86 "hpricot_css.rl" {te = p;p--;{ focus = rb_ary_new3(1, node); }} goto st87; tr155: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 77 "hpricot_css.rl" {te = p;p--;{ FILTER(ID); }} goto st87; tr159: #line 82 "hpricot_css.rl" {te = p;p--;{ FILTER(MOD); }} goto st87; tr162: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 78 "hpricot_css.rl" {te = p;p--;{ FILTER(CLASS); }} goto st87; tr166: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 85 "hpricot_css.rl" {te = p;p--;{ FILTER(PSUEDO); }} goto st87; tr173: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 84 "hpricot_css.rl" {te = p;p--;{ FILTER(POS); }} goto st87; tr192: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 83 "hpricot_css.rl" {te = p;p--;{ FILTER(CHILD); }} goto st87; tr200: #line 80 "hpricot_css.rl" {te = p;p--;{ FILTER(ATTR); }} goto st87; st87: #line 1 "NONE" {ts = 0;} #line 1 "NONE" {act = 0;} if ( ++p == pe ) goto _test_eof87; case 87: #line 1 "NONE" {ts = p;} #line 275 "hpricot_css.c" switch( (*p) ) { case -60: goto tr133; case 32: goto tr137; case 35: goto st7; case 43: goto st92; case 44: goto st90; case 45: goto tr140; case 46: goto st13; case 58: goto st19; case 62: goto tr143; case 91: goto st52; case 92: goto tr146; case 95: goto tr144; case 101: goto tr147; case 110: goto tr140; case 111: goto tr148; case 126: goto tr143; } if ( (*p) < 9 ) { if ( (*p) < -32 ) { if ( -59 <= (*p) && (*p) <= -33 ) goto tr134; } else if ( (*p) > -17 ) { if ( -16 <= (*p) && (*p) <= -12 ) goto tr136; } else goto tr135; } else if ( (*p) > 13 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr140; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr144; } else goto tr144; } else goto tr137; goto st0; st0: cs = 0; goto _out; tr133: #line 30 "hpricot_css.rl" { aps = p; } goto st1; st1: if ( ++p == pe ) goto _test_eof1; case 1: #line 328 "hpricot_css.c" if ( -88 <= (*p) && (*p) <= -65 ) goto tr1; goto tr0; tr1: #line 1 "NONE" {te = p+1;} #line 81 "hpricot_css.rl" {act = 5;} goto st88; tr144: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 81 "hpricot_css.rl" {act = 5;} goto st88; st88: if ( ++p == pe ) goto _test_eof88; case 88: #line 352 "hpricot_css.c" switch( (*p) ) { case -60: goto st1; case 45: goto tr1; case 92: goto st5; case 95: goto tr1; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st3; } else if ( (*p) >= -59 ) goto st2; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr1; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr1; } else goto tr1; } else goto st4; goto tr149; tr134: #line 30 "hpricot_css.rl" { aps = p; } goto st2; st2: if ( ++p == pe ) goto _test_eof2; case 2: #line 387 "hpricot_css.c" if ( (*p) <= -65 ) goto tr1; goto tr0; tr135: #line 30 "hpricot_css.rl" { aps = p; } goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: #line 401 "hpricot_css.c" if ( (*p) <= -65 ) goto st2; goto tr0; tr136: #line 30 "hpricot_css.rl" { aps = p; } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: #line 415 "hpricot_css.c" if ( (*p) <= -65 ) goto st3; goto tr0; tr146: #line 30 "hpricot_css.rl" { aps = p; } goto st5; st5: if ( ++p == pe ) goto _test_eof5; case 5: #line 429 "hpricot_css.c" if ( (*p) == 46 ) goto tr1; goto tr0; tr137: #line 1 "NONE" {te = p+1;} goto st89; st89: if ( ++p == pe ) goto _test_eof89; case 89: #line 441 "hpricot_css.c" switch( (*p) ) { case 32: goto st6; case 44: goto st90; } if ( 9 <= (*p) && (*p) <= 13 ) goto st6; goto tr153; st6: if ( ++p == pe ) goto _test_eof6; case 6: switch( (*p) ) { case 32: goto st6; case 44: goto st90; } if ( 9 <= (*p) && (*p) <= 13 ) goto st6; goto tr4; st90: if ( ++p == pe ) goto _test_eof90; case 90: if ( (*p) == 32 ) goto st90; if ( 9 <= (*p) && (*p) <= 13 ) goto st90; goto tr154; st7: if ( ++p == pe ) goto _test_eof7; case 7: switch( (*p) ) { case -60: goto tr7; case 45: goto tr12; case 92: goto tr13; case 95: goto tr12; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto tr10; } else if ( (*p) >= -59 ) goto tr9; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr12; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr12; } else goto tr12; } else goto tr11; goto st0; tr7: #line 30 "hpricot_css.rl" { aps = p; } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: #line 507 "hpricot_css.c" if ( -88 <= (*p) && (*p) <= -65 ) goto tr14; goto tr0; tr12: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 77 "hpricot_css.rl" {act = 1;} goto st91; tr14: #line 1 "NONE" {te = p+1;} #line 77 "hpricot_css.rl" {act = 1;} goto st91; st91: if ( ++p == pe ) goto _test_eof91; case 91: #line 531 "hpricot_css.c" switch( (*p) ) { case -60: goto st8; case 45: goto tr14; case 92: goto st12; case 95: goto tr14; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st10; } else if ( (*p) >= -59 ) goto st9; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr14; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr14; } else goto tr14; } else goto st11; goto tr155; tr9: #line 30 "hpricot_css.rl" { aps = p; } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: #line 566 "hpricot_css.c" if ( (*p) <= -65 ) goto tr14; goto tr0; tr10: #line 30 "hpricot_css.rl" { aps = p; } goto st10; st10: if ( ++p == pe ) goto _test_eof10; case 10: #line 580 "hpricot_css.c" if ( (*p) <= -65 ) goto st9; goto tr0; tr11: #line 30 "hpricot_css.rl" { aps = p; } goto st11; st11: if ( ++p == pe ) goto _test_eof11; case 11: #line 594 "hpricot_css.c" if ( (*p) <= -65 ) goto st10; goto tr0; tr13: #line 30 "hpricot_css.rl" { aps = p; } goto st12; st12: if ( ++p == pe ) goto _test_eof12; case 12: #line 608 "hpricot_css.c" if ( (*p) == 46 ) goto tr14; goto tr0; tr160: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st92; st92: if ( ++p == pe ) goto _test_eof92; case 92: #line 623 "hpricot_css.c" switch( (*p) ) { case 43: goto st92; case 45: goto st92; case 110: goto st92; } if ( 48 <= (*p) && (*p) <= 57 ) goto st92; goto tr159; tr161: #line 1 "NONE" {te = p+1;} #line 81 "hpricot_css.rl" {act = 5;} goto st93; tr140: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 81 "hpricot_css.rl" {act = 5;} goto st93; st93: if ( ++p == pe ) goto _test_eof93; case 93: #line 652 "hpricot_css.c" switch( (*p) ) { case -60: goto st1; case 43: goto tr160; case 45: goto tr161; case 92: goto st5; case 95: goto tr1; case 110: goto tr161; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st3; } else if ( (*p) >= -59 ) goto st2; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr161; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr1; } else goto tr1; } else goto st4; goto tr149; st13: if ( ++p == pe ) goto _test_eof13; case 13: switch( (*p) ) { case -60: goto tr17; case 45: goto tr21; case 92: goto tr22; case 95: goto tr21; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto tr19; } else if ( (*p) >= -59 ) goto tr18; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr21; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr21; } else goto tr21; } else goto tr20; goto st0; tr17: #line 30 "hpricot_css.rl" { aps = p; } goto st14; st14: if ( ++p == pe ) goto _test_eof14; case 14: #line 717 "hpricot_css.c" if ( -88 <= (*p) && (*p) <= -65 ) goto tr23; goto tr0; tr21: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 78 "hpricot_css.rl" {act = 2;} goto st94; tr23: #line 1 "NONE" {te = p+1;} #line 78 "hpricot_css.rl" {act = 2;} goto st94; st94: if ( ++p == pe ) goto _test_eof94; case 94: #line 741 "hpricot_css.c" switch( (*p) ) { case -60: goto st14; case 45: goto tr23; case 92: goto st18; case 95: goto tr23; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st16; } else if ( (*p) >= -59 ) goto st15; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr23; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr23; } else goto tr23; } else goto st17; goto tr162; tr18: #line 30 "hpricot_css.rl" { aps = p; } goto st15; st15: if ( ++p == pe ) goto _test_eof15; case 15: #line 776 "hpricot_css.c" if ( (*p) <= -65 ) goto tr23; goto tr0; tr19: #line 30 "hpricot_css.rl" { aps = p; } goto st16; st16: if ( ++p == pe ) goto _test_eof16; case 16: #line 790 "hpricot_css.c" if ( (*p) <= -65 ) goto st15; goto tr0; tr20: #line 30 "hpricot_css.rl" { aps = p; } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: #line 804 "hpricot_css.c" if ( (*p) <= -65 ) goto st16; goto tr0; tr22: #line 30 "hpricot_css.rl" { aps = p; } goto st18; st18: if ( ++p == pe ) goto _test_eof18; case 18: #line 818 "hpricot_css.c" if ( (*p) == 46 ) goto tr23; goto tr0; st19: if ( ++p == pe ) goto _test_eof19; case 19: switch( (*p) ) { case -60: goto tr26; case 45: goto tr30; case 92: goto tr31; case 95: goto tr30; case 101: goto tr32; case 102: goto tr33; case 103: goto tr34; case 108: goto tr35; case 110: goto tr36; case 111: goto tr37; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto tr28; } else if ( (*p) >= -59 ) goto tr27; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr30; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr30; } else goto tr30; } else goto tr29; goto st0; tr26: #line 30 "hpricot_css.rl" { aps = p; } goto st20; tr174: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st20; st20: if ( ++p == pe ) goto _test_eof20; case 20: #line 873 "hpricot_css.c" if ( -88 <= (*p) && (*p) <= -65 ) goto tr38; goto tr0; tr30: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 85 "hpricot_css.rl" {act = 9;} goto st95; tr38: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st95; tr179: #line 1 "NONE" {te = p+1;} #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 85 "hpricot_css.rl" {act = 9;} goto st95; st95: if ( ++p == pe ) goto _test_eof95; case 95: #line 908 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr27: #line 30 "hpricot_css.rl" { aps = p; } goto st21; tr175: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st21; st21: if ( ++p == pe ) goto _test_eof21; case 21: #line 951 "hpricot_css.c" if ( (*p) <= -65 ) goto tr38; goto tr0; tr28: #line 30 "hpricot_css.rl" { aps = p; } goto st22; tr176: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st22; st22: if ( ++p == pe ) goto _test_eof22; case 22: #line 972 "hpricot_css.c" if ( (*p) <= -65 ) goto st21; goto tr0; tr29: #line 30 "hpricot_css.rl" { aps = p; } goto st23; tr177: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st23; st23: if ( ++p == pe ) goto _test_eof23; case 23: #line 993 "hpricot_css.c" if ( (*p) <= -65 ) goto st22; goto tr0; tr169: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st24; st24: if ( ++p == pe ) goto _test_eof24; case 24: #line 1008 "hpricot_css.c" switch( (*p) ) { case 34: goto tr43; case 39: goto tr44; case 40: goto tr45; case 41: goto tr46; } goto tr42; tr42: #line 30 "hpricot_css.rl" { aps = p; } goto st25; st25: if ( ++p == pe ) goto _test_eof25; case 25: #line 1026 "hpricot_css.c" switch( (*p) ) { case 34: goto tr0; case 40: goto tr0; case 41: goto tr48; } goto st25; tr43: #line 30 "hpricot_css.rl" { aps = p; } goto st26; st26: if ( ++p == pe ) goto _test_eof26; case 26: #line 1043 "hpricot_css.c" switch( (*p) ) { case 34: goto st28; case 40: goto st29; case 41: goto tr0; } goto st27; st27: if ( ++p == pe ) goto _test_eof27; case 27: if ( (*p) == 34 ) goto st28; if ( 40 <= (*p) && (*p) <= 41 ) goto tr0; goto st27; st28: if ( ++p == pe ) goto _test_eof28; case 28: if ( (*p) == 41 ) goto tr48; goto tr0; st29: if ( ++p == pe ) goto _test_eof29; case 29: if ( (*p) == 41 ) goto tr0; goto st30; st30: if ( ++p == pe ) goto _test_eof30; case 30: if ( (*p) == 41 ) goto st31; goto st30; st31: if ( ++p == pe ) goto _test_eof31; case 31: switch( (*p) ) { case 34: goto st28; case 40: goto st29; } goto tr0; tr44: #line 30 "hpricot_css.rl" { aps = p; } goto st32; st32: if ( ++p == pe ) goto _test_eof32; case 32: #line 1099 "hpricot_css.c" switch( (*p) ) { case 34: goto st34; case 39: goto st25; case 40: goto st35; case 41: goto tr48; } goto st33; st33: if ( ++p == pe ) goto _test_eof33; case 33: switch( (*p) ) { case 34: goto st34; case 39: goto st25; case 40: goto tr0; case 41: goto tr48; } goto st33; st34: if ( ++p == pe ) goto _test_eof34; case 34: if ( (*p) == 39 ) goto st28; if ( 40 <= (*p) && (*p) <= 41 ) goto tr0; goto st34; st35: if ( ++p == pe ) goto _test_eof35; case 35: if ( (*p) == 41 ) goto tr0; goto st36; st36: if ( ++p == pe ) goto _test_eof36; case 36: if ( (*p) == 41 ) goto st37; goto st36; st37: if ( ++p == pe ) goto _test_eof37; case 37: switch( (*p) ) { case 39: goto st28; case 40: goto st35; } goto tr0; tr45: #line 30 "hpricot_css.rl" { aps = p; } goto st38; st38: if ( ++p == pe ) goto _test_eof38; case 38: #line 1160 "hpricot_css.c" if ( (*p) == 41 ) goto tr0; goto st39; st39: if ( ++p == pe ) goto _test_eof39; case 39: if ( (*p) == 41 ) goto st40; goto st39; st40: if ( ++p == pe ) goto _test_eof40; case 40: switch( (*p) ) { case 40: goto st38; case 41: goto tr48; } goto tr0; tr31: #line 30 "hpricot_css.rl" { aps = p; } goto st41; tr180: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st41; st41: if ( ++p == pe ) goto _test_eof41; case 41: #line 1197 "hpricot_css.c" if ( (*p) == 46 ) goto tr38; goto tr0; tr32: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 85 "hpricot_css.rl" {act = 9;} goto st96; st96: if ( ++p == pe ) goto _test_eof96; case 96: #line 1215 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 113: goto tr171; case 118: goto tr172; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr171: #line 1 "NONE" {te = p+1;} #line 84 "hpricot_css.rl" {act = 8;} goto st97; st97: if ( ++p == pe ) goto _test_eof97; case 97: #line 1253 "hpricot_css.c" switch( (*p) ) { case -60: goto tr174; case 40: goto tr178; case 45: goto tr179; case 92: goto tr180; case 95: goto tr179; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto tr176; } else if ( (*p) >= -59 ) goto tr175; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr179; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr179; } else goto tr179; } else goto tr177; goto tr173; tr178: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st42; st42: if ( ++p == pe ) goto _test_eof42; case 42: #line 1290 "hpricot_css.c" switch( (*p) ) { case 34: goto tr43; case 39: goto tr44; case 40: goto tr45; case 41: goto tr46; } if ( 48 <= (*p) && (*p) <= 57 ) goto tr63; goto tr42; tr63: #line 30 "hpricot_css.rl" { aps = p; } goto st43; st43: if ( ++p == pe ) goto _test_eof43; case 43: #line 1310 "hpricot_css.c" switch( (*p) ) { case 34: goto tr62; case 40: goto tr62; case 41: goto tr64; } if ( 48 <= (*p) && (*p) <= 57 ) goto st43; goto st25; tr172: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st98; st98: if ( ++p == pe ) goto _test_eof98; case 98: #line 1329 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 101: goto tr181; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr181: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st99; st99: if ( ++p == pe ) goto _test_eof99; case 99: #line 1366 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 110: goto tr171; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr33: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 85 "hpricot_css.rl" {act = 9;} goto st100; st100: if ( ++p == pe ) goto _test_eof100; case 100: #line 1407 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 105: goto tr182; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr182: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st101; st101: if ( ++p == pe ) goto _test_eof101; case 101: #line 1444 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 114: goto tr183; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr183: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st102; st102: if ( ++p == pe ) goto _test_eof102; case 102: #line 1481 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 115: goto tr184; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr184: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st103; st103: if ( ++p == pe ) goto _test_eof103; case 103: #line 1518 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 116: goto tr185; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr185: #line 1 "NONE" {te = p+1;} #line 84 "hpricot_css.rl" {act = 8;} goto st104; st104: if ( ++p == pe ) goto _test_eof104; case 104: #line 1555 "hpricot_css.c" switch( (*p) ) { case -60: goto tr174; case 40: goto tr178; case 45: goto tr186; case 92: goto tr180; case 95: goto tr179; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto tr176; } else if ( (*p) >= -59 ) goto tr175; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr179; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr179; } else goto tr179; } else goto tr177; goto tr173; tr199: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st105; tr186: #line 1 "NONE" {te = p+1;} #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } #line 85 "hpricot_css.rl" {act = 9;} goto st105; st105: if ( ++p == pe ) goto _test_eof105; case 105: #line 1602 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 99: goto tr187; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr187: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st106; st106: if ( ++p == pe ) goto _test_eof106; case 106: #line 1639 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 104: goto tr188; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr188: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st107; st107: if ( ++p == pe ) goto _test_eof107; case 107: #line 1676 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 105: goto tr189; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr189: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st108; st108: if ( ++p == pe ) goto _test_eof108; case 108: #line 1713 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 108: goto tr190; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr190: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st109; st109: if ( ++p == pe ) goto _test_eof109; case 109: #line 1750 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 100: goto tr191; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr191: #line 1 "NONE" {te = p+1;} #line 83 "hpricot_css.rl" {act = 7;} goto st110; st110: if ( ++p == pe ) goto _test_eof110; case 110: #line 1787 "hpricot_css.c" switch( (*p) ) { case -60: goto tr174; case 40: goto tr193; case 45: goto tr179; case 92: goto tr180; case 95: goto tr179; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto tr176; } else if ( (*p) >= -59 ) goto tr175; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr179; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr179; } else goto tr179; } else goto tr177; goto tr192; tr193: #line 34 "hpricot_css.rl" { ape = p; PUSH(aps, ape); } goto st44; st44: if ( ++p == pe ) goto _test_eof44; case 44: #line 1824 "hpricot_css.c" switch( (*p) ) { case 34: goto tr43; case 39: goto tr44; case 40: goto tr45; case 41: goto tr67; case 43: goto tr68; case 45: goto tr68; case 101: goto tr69; case 110: goto tr68; case 111: goto tr70; } if ( 48 <= (*p) && (*p) <= 57 ) goto tr68; goto tr42; tr68: #line 30 "hpricot_css.rl" { aps = p; } goto st45; st45: if ( ++p == pe ) goto _test_eof45; case 45: #line 1849 "hpricot_css.c" switch( (*p) ) { case 34: goto tr66; case 40: goto tr66; case 41: goto tr71; case 43: goto st45; case 45: goto st45; case 110: goto st45; } if ( 48 <= (*p) && (*p) <= 57 ) goto st45; goto st25; tr69: #line 30 "hpricot_css.rl" { aps = p; } goto st46; st46: if ( ++p == pe ) goto _test_eof46; case 46: #line 1871 "hpricot_css.c" switch( (*p) ) { case 34: goto tr66; case 40: goto tr66; case 41: goto tr48; case 118: goto st47; } goto st25; st47: if ( ++p == pe ) goto _test_eof47; case 47: switch( (*p) ) { case 34: goto tr66; case 40: goto tr66; case 41: goto tr48; case 101: goto st48; } goto st25; st48: if ( ++p == pe ) goto _test_eof48; case 48: switch( (*p) ) { case 34: goto tr66; case 40: goto tr66; case 41: goto tr48; case 110: goto st49; } goto st25; st49: if ( ++p == pe ) goto _test_eof49; case 49: switch( (*p) ) { case 34: goto tr66; case 40: goto tr66; case 41: goto tr71; } goto st25; tr70: #line 30 "hpricot_css.rl" { aps = p; } goto st50; st50: if ( ++p == pe ) goto _test_eof50; case 50: #line 1921 "hpricot_css.c" switch( (*p) ) { case 34: goto tr66; case 40: goto tr66; case 41: goto tr48; case 100: goto st51; } goto st25; st51: if ( ++p == pe ) goto _test_eof51; case 51: switch( (*p) ) { case 34: goto tr66; case 40: goto tr66; case 41: goto tr48; case 100: goto st49; } goto st25; tr34: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 85 "hpricot_css.rl" {act = 9;} goto st111; st111: if ( ++p == pe ) goto _test_eof111; case 111: #line 1954 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 116: goto tr171; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr35: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 85 "hpricot_css.rl" {act = 9;} goto st112; st112: if ( ++p == pe ) goto _test_eof112; case 112: #line 1995 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 97: goto tr183; case 116: goto tr171; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 98 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr36: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 85 "hpricot_css.rl" {act = 9;} goto st113; st113: if ( ++p == pe ) goto _test_eof113; case 113: #line 2037 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 116: goto tr194; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr194: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st114; st114: if ( ++p == pe ) goto _test_eof114; case 114: #line 2074 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 104: goto tr185; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr37: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 85 "hpricot_css.rl" {act = 9;} goto st115; st115: if ( ++p == pe ) goto _test_eof115; case 115: #line 2115 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 100: goto tr195; case 110: goto tr196; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr195: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st116; st116: if ( ++p == pe ) goto _test_eof116; case 116: #line 2153 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 100: goto tr171; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr196: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st117; st117: if ( ++p == pe ) goto _test_eof117; case 117: #line 2190 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 108: goto tr197; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr197: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st118; st118: if ( ++p == pe ) goto _test_eof118; case 118: #line 2227 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr38; case 92: goto st41; case 95: goto tr38; case 121: goto tr198; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; tr198: #line 1 "NONE" {te = p+1;} #line 85 "hpricot_css.rl" {act = 9;} goto st119; st119: if ( ++p == pe ) goto _test_eof119; case 119: #line 2264 "hpricot_css.c" switch( (*p) ) { case -60: goto st20; case 40: goto tr169; case 45: goto tr199; case 92: goto st41; case 95: goto tr38; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st22; } else if ( (*p) >= -59 ) goto st21; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr38; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr38; } else goto tr38; } else goto st23; goto tr166; st52: if ( ++p == pe ) goto _test_eof52; case 52: switch( (*p) ) { case -60: goto tr77; case 45: goto tr81; case 92: goto tr82; case 95: goto tr81; case 110: goto tr83; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto tr79; } else if ( (*p) >= -59 ) goto tr78; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr81; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr81; } else goto tr81; } else goto tr80; goto st0; tr77: #line 30 "hpricot_css.rl" { aps = p; } goto st53; st53: if ( ++p == pe ) goto _test_eof53; case 53: #line 2329 "hpricot_css.c" if ( -88 <= (*p) && (*p) <= -65 ) goto st54; goto st0; tr81: #line 30 "hpricot_css.rl" { aps = p; } goto st54; tr91: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st54; st54: if ( ++p == pe ) goto _test_eof54; case 54: #line 2350 "hpricot_css.c" switch( (*p) ) { case -60: goto tr86; case 32: goto tr90; case 45: goto tr91; case 61: goto tr92; case 92: goto tr93; case 95: goto tr91; } if ( (*p) < 9 ) { if ( (*p) < -32 ) { if ( -59 <= (*p) && (*p) <= -33 ) goto tr87; } else if ( (*p) > -17 ) { if ( -16 <= (*p) && (*p) <= -12 ) goto tr89; } else goto tr88; } else if ( (*p) > 13 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr91; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr91; } else goto tr91; } else goto tr90; goto tr85; tr85: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st55; st55: if ( ++p == pe ) goto _test_eof55; case 55: #line 2391 "hpricot_css.c" if ( (*p) == 61 ) goto st56; goto st0; st56: if ( ++p == pe ) goto _test_eof56; case 56: switch( (*p) ) { case 32: goto tr96; case 34: goto tr97; case 39: goto tr98; case 93: goto st0; } if ( 9 <= (*p) && (*p) <= 13 ) goto tr96; goto tr95; tr95: #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st57; st57: if ( ++p == pe ) goto _test_eof57; case 57: #line 2420 "hpricot_css.c" if ( (*p) == 93 ) goto tr100; goto st57; tr96: #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st58; st58: if ( ++p == pe ) goto _test_eof58; case 58: #line 2436 "hpricot_css.c" switch( (*p) ) { case 32: goto st58; case 34: goto st59; case 39: goto st62; case 93: goto tr100; } if ( 9 <= (*p) && (*p) <= 13 ) goto st58; goto st57; tr97: #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st59; st59: if ( ++p == pe ) goto _test_eof59; case 59: #line 2458 "hpricot_css.c" switch( (*p) ) { case 34: goto st57; case 93: goto tr104; } goto st59; tr104: #line 1 "NONE" {te = p+1;} goto st120; st120: if ( ++p == pe ) goto _test_eof120; case 120: #line 2472 "hpricot_css.c" if ( (*p) == 34 ) goto st61; goto st60; st60: if ( ++p == pe ) goto _test_eof60; case 60: if ( (*p) == 34 ) goto st61; goto st60; st61: if ( ++p == pe ) goto _test_eof61; case 61: if ( (*p) == 93 ) goto tr100; goto tr105; tr98: #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st62; st62: if ( ++p == pe ) goto _test_eof62; case 62: #line 2502 "hpricot_css.c" switch( (*p) ) { case 39: goto st57; case 93: goto tr108; } goto st62; tr108: #line 1 "NONE" {te = p+1;} goto st121; st121: if ( ++p == pe ) goto _test_eof121; case 121: #line 2516 "hpricot_css.c" if ( (*p) == 39 ) goto st61; goto st63; st63: if ( ++p == pe ) goto _test_eof63; case 63: if ( (*p) == 39 ) goto st61; goto st63; tr86: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st64; st64: if ( ++p == pe ) goto _test_eof64; case 64: #line 2538 "hpricot_css.c" if ( (*p) == 61 ) goto st56; if ( -88 <= (*p) && (*p) <= -65 ) goto st54; goto st0; tr87: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st65; st65: if ( ++p == pe ) goto _test_eof65; case 65: #line 2555 "hpricot_css.c" if ( (*p) == 61 ) goto st56; if ( (*p) <= -65 ) goto st54; goto st0; tr88: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st66; st66: if ( ++p == pe ) goto _test_eof66; case 66: #line 2572 "hpricot_css.c" if ( (*p) == 61 ) goto st56; if ( (*p) <= -65 ) goto st67; goto st0; tr78: #line 30 "hpricot_css.rl" { aps = p; } goto st67; st67: if ( ++p == pe ) goto _test_eof67; case 67: #line 2588 "hpricot_css.c" if ( (*p) <= -65 ) goto st54; goto st0; tr89: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st68; st68: if ( ++p == pe ) goto _test_eof68; case 68: #line 2603 "hpricot_css.c" if ( (*p) == 61 ) goto st56; if ( (*p) <= -65 ) goto st69; goto st0; tr79: #line 30 "hpricot_css.rl" { aps = p; } goto st69; st69: if ( ++p == pe ) goto _test_eof69; case 69: #line 2619 "hpricot_css.c" if ( (*p) <= -65 ) goto st67; goto st0; tr90: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st70; st70: if ( ++p == pe ) goto _test_eof70; case 70: #line 2634 "hpricot_css.c" switch( (*p) ) { case 32: goto st70; case 61: goto st71; } if ( 9 <= (*p) && (*p) <= 13 ) goto st70; goto st55; tr92: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st71; st71: if ( ++p == pe ) goto _test_eof71; case 71: #line 2653 "hpricot_css.c" switch( (*p) ) { case 32: goto tr96; case 34: goto tr97; case 39: goto tr98; case 61: goto tr115; case 93: goto st0; } if ( 9 <= (*p) && (*p) <= 13 ) goto tr96; goto tr95; tr115: #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st72; st72: if ( ++p == pe ) goto _test_eof72; case 72: #line 2676 "hpricot_css.c" switch( (*p) ) { case 32: goto tr96; case 34: goto tr97; case 39: goto tr98; case 93: goto tr100; } if ( 9 <= (*p) && (*p) <= 13 ) goto tr96; goto tr95; tr93: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st73; st73: if ( ++p == pe ) goto _test_eof73; case 73: #line 2697 "hpricot_css.c" switch( (*p) ) { case 46: goto st54; case 61: goto st56; } goto st0; tr80: #line 30 "hpricot_css.rl" { aps = p; } goto st74; st74: if ( ++p == pe ) goto _test_eof74; case 74: #line 2713 "hpricot_css.c" if ( (*p) <= -65 ) goto st69; goto st0; tr82: #line 30 "hpricot_css.rl" { aps = p; } goto st75; st75: if ( ++p == pe ) goto _test_eof75; case 75: #line 2727 "hpricot_css.c" if ( (*p) == 46 ) goto st54; goto st0; tr83: #line 30 "hpricot_css.rl" { aps = p; } goto st76; st76: if ( ++p == pe ) goto _test_eof76; case 76: #line 2741 "hpricot_css.c" switch( (*p) ) { case -60: goto tr86; case 32: goto tr90; case 45: goto tr91; case 61: goto tr92; case 92: goto tr93; case 95: goto tr91; case 97: goto tr116; } if ( (*p) < 9 ) { if ( (*p) < -32 ) { if ( -59 <= (*p) && (*p) <= -33 ) goto tr87; } else if ( (*p) > -17 ) { if ( -16 <= (*p) && (*p) <= -12 ) goto tr89; } else goto tr88; } else if ( (*p) > 13 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr91; } else if ( (*p) > 90 ) { if ( 98 <= (*p) && (*p) <= 122 ) goto tr91; } else goto tr91; } else goto tr90; goto tr85; tr116: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st77; st77: if ( ++p == pe ) goto _test_eof77; case 77: #line 2783 "hpricot_css.c" switch( (*p) ) { case -60: goto tr86; case 32: goto tr90; case 45: goto tr91; case 61: goto tr92; case 92: goto tr93; case 95: goto tr91; case 109: goto tr117; } if ( (*p) < 9 ) { if ( (*p) < -32 ) { if ( -59 <= (*p) && (*p) <= -33 ) goto tr87; } else if ( (*p) > -17 ) { if ( -16 <= (*p) && (*p) <= -12 ) goto tr89; } else goto tr88; } else if ( (*p) > 13 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr91; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr91; } else goto tr91; } else goto tr90; goto tr85; tr117: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st78; st78: if ( ++p == pe ) goto _test_eof78; case 78: #line 2825 "hpricot_css.c" switch( (*p) ) { case -60: goto tr86; case 32: goto tr90; case 45: goto tr91; case 61: goto tr92; case 92: goto tr93; case 95: goto tr91; case 101: goto tr118; } if ( (*p) < 9 ) { if ( (*p) < -32 ) { if ( -59 <= (*p) && (*p) <= -33 ) goto tr87; } else if ( (*p) > -17 ) { if ( -16 <= (*p) && (*p) <= -12 ) goto tr89; } else goto tr88; } else if ( (*p) > 13 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr91; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr91; } else goto tr91; } else goto tr90; goto tr85; tr118: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st79; st79: if ( ++p == pe ) goto _test_eof79; case 79: #line 2867 "hpricot_css.c" switch( (*p) ) { case -60: goto tr86; case 32: goto tr90; case 45: goto tr91; case 61: goto tr119; case 92: goto tr93; case 95: goto tr91; } if ( (*p) < 9 ) { if ( (*p) < -32 ) { if ( -59 <= (*p) && (*p) <= -33 ) goto tr87; } else if ( (*p) > -17 ) { if ( -16 <= (*p) && (*p) <= -12 ) goto tr89; } else goto tr88; } else if ( (*p) > 13 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr91; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr91; } else goto tr91; } else goto tr90; goto tr85; tr119: #line 39 "hpricot_css.rl" { ape = p; aps2 = p; } goto st80; st80: if ( ++p == pe ) goto _test_eof80; case 80: #line 2908 "hpricot_css.c" switch( (*p) ) { case -60: goto tr120; case 32: goto tr96; case 34: goto tr97; case 39: goto tr98; case 45: goto tr124; case 61: goto tr115; case 92: goto tr125; case 93: goto st0; case 95: goto tr124; } if ( (*p) < 9 ) { if ( (*p) < -32 ) { if ( -59 <= (*p) && (*p) <= -33 ) goto tr121; } else if ( (*p) > -17 ) { if ( -16 <= (*p) && (*p) <= -12 ) goto tr123; } else goto tr122; } else if ( (*p) > 13 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr124; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr124; } else goto tr124; } else goto tr96; goto tr95; tr120: #line 30 "hpricot_css.rl" { aps = p; } #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st81; st81: if ( ++p == pe ) goto _test_eof81; case 81: #line 2957 "hpricot_css.c" if ( (*p) == 93 ) goto tr100; if ( -88 <= (*p) && (*p) <= -65 ) goto st82; goto st57; tr124: #line 30 "hpricot_css.rl" { aps = p; } #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st82; st82: if ( ++p == pe ) goto _test_eof82; case 82: #line 2979 "hpricot_css.c" switch( (*p) ) { case -60: goto st81; case 45: goto st82; case 92: goto st86; case 93: goto tr132; case 95: goto st82; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st84; } else if ( (*p) >= -59 ) goto st83; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st82; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st82; } else goto st82; } else goto st85; goto st57; tr121: #line 30 "hpricot_css.rl" { aps = p; } #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st83; st83: if ( ++p == pe ) goto _test_eof83; case 83: #line 3021 "hpricot_css.c" if ( (*p) == 93 ) goto tr100; if ( (*p) <= -65 ) goto st82; goto st57; tr122: #line 30 "hpricot_css.rl" { aps = p; } #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st84; st84: if ( ++p == pe ) goto _test_eof84; case 84: #line 3043 "hpricot_css.c" if ( (*p) == 93 ) goto tr100; if ( (*p) <= -65 ) goto st83; goto st57; tr123: #line 30 "hpricot_css.rl" { aps = p; } #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st85; st85: if ( ++p == pe ) goto _test_eof85; case 85: #line 3065 "hpricot_css.c" if ( (*p) == 93 ) goto tr100; if ( (*p) <= -65 ) goto st84; goto st57; tr125: #line 30 "hpricot_css.rl" { aps = p; } #line 44 "hpricot_css.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } goto st86; st86: if ( ++p == pe ) goto _test_eof86; case 86: #line 3087 "hpricot_css.c" switch( (*p) ) { case 46: goto st82; case 93: goto tr100; } goto st57; tr147: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 81 "hpricot_css.rl" {act = 5;} goto st122; st122: if ( ++p == pe ) goto _test_eof122; case 122: #line 3107 "hpricot_css.c" switch( (*p) ) { case -60: goto st1; case 45: goto tr1; case 92: goto st5; case 95: goto tr1; case 118: goto tr201; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st3; } else if ( (*p) >= -59 ) goto st2; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr1; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr1; } else goto tr1; } else goto st4; goto tr149; tr201: #line 1 "NONE" {te = p+1;} #line 81 "hpricot_css.rl" {act = 5;} goto st123; st123: if ( ++p == pe ) goto _test_eof123; case 123: #line 3143 "hpricot_css.c" switch( (*p) ) { case -60: goto st1; case 45: goto tr1; case 92: goto st5; case 95: goto tr1; case 101: goto tr202; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st3; } else if ( (*p) >= -59 ) goto st2; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr1; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr1; } else goto tr1; } else goto st4; goto tr149; tr202: #line 1 "NONE" {te = p+1;} #line 81 "hpricot_css.rl" {act = 5;} goto st124; st124: if ( ++p == pe ) goto _test_eof124; case 124: #line 3179 "hpricot_css.c" switch( (*p) ) { case -60: goto st1; case 45: goto tr1; case 92: goto st5; case 95: goto tr1; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st3; } else if ( (*p) >= -59 ) goto st2; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr1; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr1; } else goto tr1; } else goto st4; goto tr149; tr148: #line 1 "NONE" {te = p+1;} #line 30 "hpricot_css.rl" { aps = p; } #line 81 "hpricot_css.rl" {act = 5;} goto st125; st125: if ( ++p == pe ) goto _test_eof125; case 125: #line 3218 "hpricot_css.c" switch( (*p) ) { case -60: goto st1; case 45: goto tr1; case 92: goto st5; case 95: goto tr1; case 100: goto tr203; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st3; } else if ( (*p) >= -59 ) goto st2; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr1; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr1; } else goto tr1; } else goto st4; goto tr149; tr203: #line 1 "NONE" {te = p+1;} #line 81 "hpricot_css.rl" {act = 5;} goto st126; st126: if ( ++p == pe ) goto _test_eof126; case 126: #line 3254 "hpricot_css.c" switch( (*p) ) { case -60: goto st1; case 45: goto tr1; case 92: goto st5; case 95: goto tr1; } if ( (*p) < -16 ) { if ( (*p) > -33 ) { if ( -32 <= (*p) && (*p) <= -17 ) goto st3; } else if ( (*p) >= -59 ) goto st2; } else if ( (*p) > -12 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr1; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr1; } else goto tr1; } else goto st4; goto tr149; } _test_eof87: cs = 87; goto _test_eof; _test_eof1: cs = 1; goto _test_eof; _test_eof88: cs = 88; goto _test_eof; _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; _test_eof89: cs = 89; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof90: cs = 90; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof91: cs = 91; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; _test_eof92: cs = 92; goto _test_eof; _test_eof93: cs = 93; goto _test_eof; _test_eof13: cs = 13; goto _test_eof; _test_eof14: cs = 14; goto _test_eof; _test_eof94: cs = 94; goto _test_eof; _test_eof15: cs = 15; goto _test_eof; _test_eof16: cs = 16; goto _test_eof; _test_eof17: cs = 17; goto _test_eof; _test_eof18: cs = 18; goto _test_eof; _test_eof19: cs = 19; goto _test_eof; _test_eof20: cs = 20; goto _test_eof; _test_eof95: cs = 95; goto _test_eof; _test_eof21: cs = 21; goto _test_eof; _test_eof22: cs = 22; goto _test_eof; _test_eof23: cs = 23; goto _test_eof; _test_eof24: cs = 24; goto _test_eof; _test_eof25: cs = 25; goto _test_eof; _test_eof26: cs = 26; goto _test_eof; _test_eof27: cs = 27; goto _test_eof; _test_eof28: cs = 28; goto _test_eof; _test_eof29: cs = 29; goto _test_eof; _test_eof30: cs = 30; goto _test_eof; _test_eof31: cs = 31; goto _test_eof; _test_eof32: cs = 32; goto _test_eof; _test_eof33: cs = 33; goto _test_eof; _test_eof34: cs = 34; goto _test_eof; _test_eof35: cs = 35; goto _test_eof; _test_eof36: cs = 36; goto _test_eof; _test_eof37: cs = 37; goto _test_eof; _test_eof38: cs = 38; goto _test_eof; _test_eof39: cs = 39; goto _test_eof; _test_eof40: cs = 40; goto _test_eof; _test_eof41: cs = 41; goto _test_eof; _test_eof96: cs = 96; goto _test_eof; _test_eof97: cs = 97; goto _test_eof; _test_eof42: cs = 42; goto _test_eof; _test_eof43: cs = 43; goto _test_eof; _test_eof98: cs = 98; goto _test_eof; _test_eof99: cs = 99; goto _test_eof; _test_eof100: cs = 100; goto _test_eof; _test_eof101: cs = 101; goto _test_eof; _test_eof102: cs = 102; goto _test_eof; _test_eof103: cs = 103; goto _test_eof; _test_eof104: cs = 104; goto _test_eof; _test_eof105: cs = 105; goto _test_eof; _test_eof106: cs = 106; goto _test_eof; _test_eof107: cs = 107; goto _test_eof; _test_eof108: cs = 108; goto _test_eof; _test_eof109: cs = 109; goto _test_eof; _test_eof110: cs = 110; goto _test_eof; _test_eof44: cs = 44; goto _test_eof; _test_eof45: cs = 45; goto _test_eof; _test_eof46: cs = 46; goto _test_eof; _test_eof47: cs = 47; goto _test_eof; _test_eof48: cs = 48; goto _test_eof; _test_eof49: cs = 49; goto _test_eof; _test_eof50: cs = 50; goto _test_eof; _test_eof51: cs = 51; goto _test_eof; _test_eof111: cs = 111; goto _test_eof; _test_eof112: cs = 112; goto _test_eof; _test_eof113: cs = 113; goto _test_eof; _test_eof114: cs = 114; goto _test_eof; _test_eof115: cs = 115; goto _test_eof; _test_eof116: cs = 116; goto _test_eof; _test_eof117: cs = 117; goto _test_eof; _test_eof118: cs = 118; goto _test_eof; _test_eof119: cs = 119; goto _test_eof; _test_eof52: cs = 52; goto _test_eof; _test_eof53: cs = 53; goto _test_eof; _test_eof54: cs = 54; goto _test_eof; _test_eof55: cs = 55; goto _test_eof; _test_eof56: cs = 56; goto _test_eof; _test_eof57: cs = 57; goto _test_eof; _test_eof58: cs = 58; goto _test_eof; _test_eof59: cs = 59; goto _test_eof; _test_eof120: cs = 120; goto _test_eof; _test_eof60: cs = 60; goto _test_eof; _test_eof61: cs = 61; goto _test_eof; _test_eof62: cs = 62; goto _test_eof; _test_eof121: cs = 121; goto _test_eof; _test_eof63: cs = 63; goto _test_eof; _test_eof64: cs = 64; goto _test_eof; _test_eof65: cs = 65; goto _test_eof; _test_eof66: cs = 66; goto _test_eof; _test_eof67: cs = 67; goto _test_eof; _test_eof68: cs = 68; goto _test_eof; _test_eof69: cs = 69; goto _test_eof; _test_eof70: cs = 70; goto _test_eof; _test_eof71: cs = 71; goto _test_eof; _test_eof72: cs = 72; goto _test_eof; _test_eof73: cs = 73; goto _test_eof; _test_eof74: cs = 74; goto _test_eof; _test_eof75: cs = 75; goto _test_eof; _test_eof76: cs = 76; goto _test_eof; _test_eof77: cs = 77; goto _test_eof; _test_eof78: cs = 78; goto _test_eof; _test_eof79: cs = 79; goto _test_eof; _test_eof80: cs = 80; goto _test_eof; _test_eof81: cs = 81; goto _test_eof; _test_eof82: cs = 82; goto _test_eof; _test_eof83: cs = 83; goto _test_eof; _test_eof84: cs = 84; goto _test_eof; _test_eof85: cs = 85; goto _test_eof; _test_eof86: cs = 86; goto _test_eof; _test_eof122: cs = 122; goto _test_eof; _test_eof123: cs = 123; goto _test_eof; _test_eof124: cs = 124; goto _test_eof; _test_eof125: cs = 125; goto _test_eof; _test_eof126: cs = 126; goto _test_eof; _test_eof: {} if ( p == eof ) { switch ( cs ) { case 1: goto tr0; case 88: goto tr149; case 2: goto tr0; case 3: goto tr0; case 4: goto tr0; case 5: goto tr0; case 89: goto tr153; case 6: goto tr4; case 90: goto tr154; case 8: goto tr0; case 91: goto tr155; case 9: goto tr0; case 10: goto tr0; case 11: goto tr0; case 12: goto tr0; case 92: goto tr159; case 93: goto tr149; case 14: goto tr0; case 94: goto tr162; case 15: goto tr0; case 16: goto tr0; case 17: goto tr0; case 18: goto tr0; case 20: goto tr0; case 95: goto tr166; case 21: goto tr0; case 22: goto tr0; case 23: goto tr0; case 24: goto tr41; case 25: goto tr0; case 26: goto tr0; case 27: goto tr0; case 28: goto tr0; case 29: goto tr0; case 30: goto tr0; case 31: goto tr0; case 32: goto tr0; case 33: goto tr0; case 34: goto tr0; case 35: goto tr0; case 36: goto tr0; case 37: goto tr0; case 38: goto tr0; case 39: goto tr0; case 40: goto tr0; case 41: goto tr0; case 96: goto tr166; case 97: goto tr173; case 42: goto tr62; case 43: goto tr62; case 98: goto tr166; case 99: goto tr166; case 100: goto tr166; case 101: goto tr166; case 102: goto tr166; case 103: goto tr166; case 104: goto tr173; case 105: goto tr166; case 106: goto tr166; case 107: goto tr166; case 108: goto tr166; case 109: goto tr166; case 110: goto tr192; case 44: goto tr66; case 45: goto tr66; case 46: goto tr66; case 47: goto tr66; case 48: goto tr66; case 49: goto tr66; case 50: goto tr66; case 51: goto tr66; case 111: goto tr166; case 112: goto tr166; case 113: goto tr166; case 114: goto tr166; case 115: goto tr166; case 116: goto tr166; case 117: goto tr166; case 118: goto tr166; case 119: goto tr166; case 120: goto tr200; case 60: goto tr105; case 61: goto tr105; case 121: goto tr200; case 63: goto tr105; case 122: goto tr149; case 123: goto tr149; case 124: goto tr149; case 125: goto tr149; case 126: goto tr149; } } _out: {} } #line 116 "hpricot_css.rl" rb_gc_unregister_address(&focus); rb_gc_unregister_address(&tmpt); return focus; } hpricot-0.8.6/ext/hpricot_scan/HpricotScanService.java0000644000175000017500000025161511710073440022437 0ustar boutilboutil // line 1 "hpricot_scan.java.rl" import java.io.IOException; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyHash; import org.jruby.RubyModule; import org.jruby.RubyNumeric; import org.jruby.RubyObject; import org.jruby.RubyObjectAdapter; import org.jruby.RubyRegexp; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; import org.jruby.javasupport.JavaEmbedUtils; import org.jruby.runtime.Arity; import org.jruby.runtime.Block; import org.jruby.runtime.ObjectAllocator; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.callback.Callback; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.load.BasicLibraryService; import org.jruby.util.ByteList; public class HpricotScanService implements BasicLibraryService { public static byte[] realloc(byte[] input, int size) { byte[] newArray = new byte[size]; System.arraycopy(input, 0, newArray, 0, input.length); return newArray; } // hpricot_state public static class State { public IRubyObject doc; public IRubyObject focus; public IRubyObject last; public IRubyObject EC; public boolean xml, strict, fixup; } static boolean OPT(IRubyObject opts, String key) { Ruby runtime = opts.getRuntime(); return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue(); } // H_PROP(name, H_ELE_TAG) public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_TAG, x); return self; } public static IRubyObject hpricot_ele_clear_name(IRubyObject self) { H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_name(IRubyObject self) { return H_ELE_GET(self, H_ELE_TAG); } // H_PROP(raw, H_ELE_RAW) public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_RAW, x); return self; } public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) { H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_raw(IRubyObject self) { return H_ELE_GET(self, H_ELE_RAW); } // H_PROP(parent, H_ELE_PARENT) public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_PARENT, x); return self; } public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) { H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_parent(IRubyObject self) { return H_ELE_GET(self, H_ELE_PARENT); } // H_PROP(attr, H_ELE_ATTR) public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_ATTR, x); return self; } public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) { H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_attr(IRubyObject self) { return H_ELE_GET(self, H_ELE_ATTR); } // H_PROP(etag, H_ELE_ETAG) public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_ETAG, x); return self; } public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) { H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_etag(IRubyObject self) { return H_ELE_GET(self, H_ELE_ETAG); } // H_PROP(children, H_ELE_CHILDREN) public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) { H_ELE_SET(self, H_ELE_CHILDREN, x); return self; } public static IRubyObject hpricot_ele_clear_children(IRubyObject self) { H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil()); return self.getRuntime().getTrue(); } public static IRubyObject hpricot_ele_get_children(IRubyObject self) { return H_ELE_GET(self, H_ELE_CHILDREN); } // H_ATTR(target) public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) { H_ELE_GET_asHash(self, H_ELE_ATTR).fastASet(self.getRuntime().newSymbol("target"), x); return self; } public static IRubyObject hpricot_ele_get_target(IRubyObject self) { return H_ELE_GET_asHash(self, H_ELE_ATTR).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target")); } // H_ATTR(encoding) public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x); return self; } public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding")); } // H_ATTR(version) public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x); return self; } public static IRubyObject hpricot_ele_get_version(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version")); } // H_ATTR(standalone) public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x); return self; } public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone")); } // H_ATTR(system_id) public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x); return self; } public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id")); } // H_ATTR(public_id) public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) { ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x); return self; } public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) { return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id")); } public static class Scanner { public IRubyObject SET(int mark, int E, IRubyObject org) { if(mark == -1 || E == mark) { return runtime.newString(""); } else if(E > mark) { return RubyString.newString(runtime, data, mark, E-mark); } else { return org; } } public int SLIDE(int N) { if(N > ts) { return N - ts; } else { return N; } } public IRubyObject CAT(IRubyObject N, int mark, int E) { if(N.isNil()) { return SET(mark, E, N); } else { ((RubyString)N).cat(data, mark, E-mark); return N; } } public void ATTR(IRubyObject K, IRubyObject V) { if(!K.isNil()) { if(attr.isNil()) { attr = RubyHash.newHash(runtime); } ((RubyHash)attr).fastASet(K, V); } } public void TEXT_PASS() { if(!text) { if(ele_open) { ele_open = false; if(ts != -1) { mark_tag = ts; } } else { mark_tag = p; } attr = runtime.getNil(); tag = runtime.getNil(); text = true; } } public void ELE(IRubyObject N) { if(te > ts || text) { int raw = -1; int rawlen = 0; ele_open = false; text = false; if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) { raw = ts; rawlen = te - ts; } if(block.isGiven()) { IRubyObject raw_string = runtime.getNil(); if(raw != -1) { raw_string = RubyString.newString(runtime, data, raw, rawlen); } yieldTokens(N, tag, attr, runtime.getNil(), taint); } else { hpricotToken(S, N, tag, attr, raw, rawlen, taint); } } } public void EBLK(IRubyObject N, int T) { tag = CAT(tag, mark_tag, p - T + 1); ELE(N); } public void hpricotAdd(IRubyObject focus, IRubyObject ele) { IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN); if(children.isNil()) { H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1)); } ((RubyArray)children).append(ele); H_ELE_SET(ele, H_ELE_PARENT, focus); } private static class TokenInfo { public IRubyObject sym; public IRubyObject tag; public IRubyObject attr; public int raw; public int rawlen; public IRubyObject ec; public IRubyObject ele; public Extra x; public Ruby runtime; public Scanner scanner; public State S; public void H_ELE(RubyClass klass) { ele = klass.allocate(); if(klass == x.cElem) { H_ELE_SET(ele, H_ELE_TAG, tag); H_ELE_SET(ele, H_ELE_ATTR, attr); H_ELE_SET(ele, H_ELE_EC, ec); if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) { H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen)); } } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) { if(klass == x.cBogusETag) { H_ELE_SET(ele, H_ELE_TAG, tag); if(raw != -1) { H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen)); } } else { if(klass == x.cDocType) { scanner.ATTR(runtime.newSymbol("target"), tag); } H_ELE_SET(ele, H_ELE_ATTR, attr); if(klass != x.cProcIns) { tag = runtime.getNil(); if(raw != -1) { tag = RubyString.newString(runtime, scanner.data, raw, rawlen); } } H_ELE_SET(ele, H_ELE_TAG, tag); } } else { H_ELE_SET(ele, H_ELE_TAG, tag); } S.last = ele; } public void hpricotToken(boolean taint) { // // in html mode, fix up start tags incorrectly formed as empty tags // if(!S.xml) { if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) { ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag); if(ec.isNil()) { tag = tag.callMethod(scanner.ctx, "downcase"); ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag); } } if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA && (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) && !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) { sym = x.sym_text; tag = RubyString.newString(runtime, scanner.data, raw, rawlen); } if(!ec.isNil()) { if(sym == x.sym_emptytag) { if(ec != x.sym_EMPTY) { sym = x.sym_stag; } } else if(sym == x.sym_stag) { if(ec == x.sym_EMPTY) { sym = x.sym_emptytag; } } } } if(sym == x.sym_emptytag || sym == x.sym_stag) { IRubyObject name = runtime.newFixnum(tag.hashCode()); H_ELE(x.cElem); H_ELE_SET(ele, H_ELE_HASH, name); if(!S.xml) { IRubyObject match = runtime.getNil(), e = S.focus; while(e != S.doc) { if (ec.isNil()) { // Anything can contain an unknown element if(match.isNil()) { match = e; } } else { IRubyObject hEC = H_ELE_GET(e, H_ELE_EC); if(hEC instanceof RubyHash) { IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name); if(!has.isNil()) { if(has == runtime.getTrue()) { if(match.isNil()) { match = e; } } else if(has == x.symAllow) { match = S.focus; } else if(has == x.symDeny) { match = runtime.getNil(); } } } else { // Unknown elements can contain anything if(match.isNil()) { match = e; } } } e = H_ELE_GET(e, H_ELE_PARENT); } if(match.isNil()) { match = S.focus; } S.focus = match; } scanner.hpricotAdd(S.focus, ele); // // in the case of a start tag that should be empty, just // skip the step that focuses the element. focusing moves // us deeper into the document. // if(sym == x.sym_stag) { if(S.xml || ec != x.sym_EMPTY) { S.focus = ele; S.last = runtime.getNil(); } } } else if(sym == x.sym_etag) { IRubyObject name, match = runtime.getNil(), e = S.focus; if(S.strict) { if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) { tag = runtime.newString("div"); } } name = runtime.newFixnum(tag.hashCode()); while(e != S.doc) { if(H_ELE_GET(e, H_ELE_HASH).equals(name)) { match = e; break; } e = H_ELE_GET(e, H_ELE_PARENT); } if(match.isNil()) { H_ELE(x.cBogusETag); scanner.hpricotAdd(S.focus, ele); } else { ele = runtime.getNil(); if(raw != -1) { ele = RubyString.newString(runtime, scanner.data, raw, rawlen); } H_ELE_SET(match, H_ELE_ETAG, ele); S.focus = H_ELE_GET(match, H_ELE_PARENT); S.last = runtime.getNil(); } } else if(sym == x.sym_cdata) { H_ELE(x.cCData); scanner.hpricotAdd(S.focus, ele); } else if(sym == x.sym_comment) { H_ELE(x.cComment); scanner.hpricotAdd(S.focus, ele); } else if(sym == x.sym_doctype) { H_ELE(x.cDocType); if(S.strict) { RubyHash h = (RubyHash)attr; h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")); h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN")); } scanner.hpricotAdd(S.focus, ele); } else if(sym == x.sym_procins) { IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse); tag = RubyRegexp.nth_match(1, match); attr = RubyRegexp.nth_match(2, match); H_ELE(x.cProcIns); scanner.hpricotAdd(S.focus, ele); } else if(sym == x.sym_text) { if(!S.last.isNil() && S.last.getType() == x.cText) { ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag); } else { H_ELE(x.cText); scanner.hpricotAdd(S.focus, ele); } } else if(sym == x.sym_xmldecl) { H_ELE(x.cXMLDecl); scanner.hpricotAdd(S.focus, ele); } } } public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) { TokenInfo t = new TokenInfo(); t.sym = _sym; t.tag = _tag; t.attr = _attr; t.raw = _raw; t.rawlen = _rawlen; t.ec = runtime.getNil(); t.ele = runtime.getNil(); t.x = x; t.runtime = runtime; t.scanner = this; t.S = S; t.hpricotToken(taint); } public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) { if(sym == x.sym_text) { raw = tag; } IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw}); if(taint) { ary.setTaint(true); tag.setTaint(true); attr.setTaint(true); raw.setTaint(true); } block.yield(ctx, ary); } // line 573 "hpricot_scan.java.rl" // line 531 "HpricotScanService.java" private static byte[] init__hpricot_scan_actions_0() { return new byte [] { 0, 1, 1, 1, 2, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9, 1, 10, 1, 11, 1, 12, 1, 14, 1, 16, 1, 20, 1, 21, 1, 22, 1, 24, 1, 25, 1, 26, 1, 28, 1, 29, 1, 30, 1, 32, 1, 33, 1, 38, 1, 39, 1, 40, 1, 41, 1, 42, 1, 43, 1, 44, 1, 45, 1, 46, 1, 47, 1, 48, 1, 49, 1, 50, 1, 51, 2, 2, 5, 2, 2, 6, 2, 2, 11, 2, 2, 12, 2, 2, 14, 2, 4, 39, 2, 4, 40, 2, 4, 41, 2, 5, 2, 2, 6, 14, 2, 7, 6, 2, 7, 14, 2, 11, 12, 2, 13, 3, 2, 14, 6, 2, 14, 40, 2, 15, 24, 2, 15, 28, 2, 15, 32, 2, 15, 45, 2, 17, 23, 2, 18, 27, 2, 19, 31, 2, 22, 34, 2, 22, 36, 3, 2, 6, 14, 3, 2, 14, 6, 3, 6, 7, 14, 3, 6, 14, 40, 3, 7, 14, 40, 3, 11, 2, 12, 3, 14, 6, 40, 3, 14, 13, 3, 3, 22, 0, 37, 3, 22, 2, 34, 3, 22, 14, 35, 4, 2, 14, 13, 3, 4, 6, 7, 14, 40, 4, 22, 2, 14, 35, 4, 22, 6, 14, 35, 4, 22, 7, 14, 35, 4, 22, 14, 6, 35, 5, 22, 2, 6, 14, 35, 5, 22, 2, 14, 6, 35, 5, 22, 6, 7, 14, 35 }; } private static final byte _hpricot_scan_actions[] = init__hpricot_scan_actions_0(); private static short[] init__hpricot_scan_key_offsets_0() { return new short [] { 0, 3, 4, 5, 6, 7, 8, 9, 10, 13, 22, 37, 44, 45, 46, 47, 48, 49, 52, 57, 69, 81, 86, 93, 94, 95, 100, 101, 105, 106, 107, 121, 135, 152, 169, 186, 203, 210, 212, 214, 220, 222, 227, 232, 238, 240, 245, 251, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 282, 296, 300, 313, 326, 340, 354, 355, 366, 375, 388, 405, 423, 441, 450, 461, 480, 499, 509, 519, 533, 534, 549, 564, 566, 577, 588, 607, 626, 644, 662, 681, 700, 710, 721, 732, 743, 758, 760, 774, 775, 790, 805, 807, 818, 828, 846, 865, 884, 894, 905, 924, 943, 954, 973, 993, 1009, 1025, 1028, 1039, 1050, 1069, 1088, 1107, 1118, 1137, 1152, 1167, 1178, 1198, 1216, 1218, 1232, 1243, 1257, 1259, 1269, 1270, 1271, 1282, 1289, 1302, 1316, 1330, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1353, 1358, 1367, 1377, 1382, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1401, 1406, 1410, 1420, 1425, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1444, 1449, 1451, 1452, 1453, 1458, 1459, 1460, 1462, 1463, 1464, 1465, 1466, 1470, 1480, 1489, 1499, 1500, 1501, 1503, 1512, 1513, 1514, 1515, 1516, 1517, 1519, 1522, 1526, 1528, 1529, 1531, 1532, 1535 }; } private static final short _hpricot_scan_key_offsets[] = init__hpricot_scan_key_offsets_0(); private static char[] init__hpricot_scan_trans_keys_0() { return new char [] { 45, 68, 91, 45, 79, 67, 84, 89, 80, 69, 32, 9, 13, 32, 58, 95, 9, 13, 65, 90, 97, 122, 32, 62, 63, 91, 95, 9, 13, 45, 46, 48, 58, 65, 90, 97, 122, 32, 62, 80, 83, 91, 9, 13, 85, 66, 76, 73, 67, 32, 9, 13, 32, 34, 39, 9, 13, 9, 34, 61, 95, 32, 37, 39, 59, 63, 90, 97, 122, 9, 34, 61, 95, 32, 37, 39, 59, 63, 90, 97, 122, 32, 62, 91, 9, 13, 32, 34, 39, 62, 91, 9, 13, 34, 34, 32, 62, 91, 9, 13, 93, 32, 62, 9, 13, 39, 39, 9, 39, 61, 95, 32, 33, 35, 37, 40, 59, 63, 90, 97, 122, 9, 39, 61, 95, 32, 33, 35, 37, 40, 59, 63, 90, 97, 122, 9, 32, 33, 39, 62, 91, 95, 10, 13, 35, 37, 40, 59, 61, 90, 97, 122, 9, 32, 34, 39, 62, 91, 95, 10, 13, 33, 37, 40, 59, 61, 90, 97, 122, 9, 32, 33, 39, 62, 91, 95, 10, 13, 35, 37, 40, 59, 61, 90, 97, 122, 9, 32, 34, 39, 62, 91, 95, 10, 13, 33, 37, 40, 59, 61, 90, 97, 122, 32, 34, 39, 62, 91, 9, 13, 34, 39, 34, 39, 32, 39, 62, 91, 9, 13, 39, 93, 32, 62, 93, 9, 13, 32, 39, 62, 9, 13, 32, 34, 62, 91, 9, 13, 34, 93, 32, 34, 62, 9, 13, 32, 39, 62, 91, 9, 13, 9, 39, 61, 95, 32, 33, 35, 37, 40, 59, 63, 90, 97, 122, 89, 83, 84, 69, 77, 67, 68, 65, 84, 65, 91, 58, 95, 65, 90, 97, 122, 32, 62, 63, 95, 9, 13, 45, 46, 48, 58, 65, 90, 97, 122, 32, 62, 9, 13, 32, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 32, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 32, 47, 61, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 32, 47, 61, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 62, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 47, 60, 62, 9, 10, 11, 12, 32, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 13, 32, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 47, 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 47, 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 47, 60, 62, 9, 10, 11, 12, 32, 34, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 34, 32, 34, 47, 61, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 32, 34, 47, 61, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 34, 62, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 47, 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 47, 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 32, 34, 39, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 34, 39, 32, 39, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 39, 32, 39, 47, 61, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 32, 39, 47, 61, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 39, 62, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 39, 47, 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 39, 47, 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 32, 34, 39, 47, 61, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 32, 34, 39, 47, 61, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 34, 39, 62, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 32, 34, 39, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 32, 34, 39, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 39, 47, 60, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 34, 39, 32, 39, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 32, 34, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97, 122, 34, 39, 13, 32, 39, 47, 60, 62, 9, 10, 11, 12, 34, 39, 13, 32, 34, 39, 47, 60, 62, 9, 10, 11, 12, 58, 95, 120, 65, 90, 97, 122, 32, 63, 95, 9, 13, 45, 46, 48, 58, 65, 90, 97, 122, 32, 63, 95, 109, 9, 13, 45, 46, 48, 58, 65, 90, 97, 122, 32, 63, 95, 108, 9, 13, 45, 46, 48, 58, 65, 90, 97, 122, 32, 63, 95, 9, 13, 45, 46, 48, 58, 65, 90, 97, 122, 101, 114, 115, 105, 111, 110, 32, 61, 9, 13, 32, 34, 39, 9, 13, 95, 45, 46, 48, 58, 65, 90, 97, 122, 34, 95, 45, 46, 48, 58, 65, 90, 97, 122, 32, 62, 63, 9, 13, 32, 62, 63, 101, 115, 9, 13, 62, 110, 99, 111, 100, 105, 110, 103, 32, 61, 9, 13, 32, 34, 39, 9, 13, 65, 90, 97, 122, 34, 95, 45, 46, 48, 57, 65, 90, 97, 122, 32, 62, 63, 9, 13, 32, 62, 63, 115, 9, 13, 116, 97, 110, 100, 97, 108, 111, 110, 101, 32, 61, 9, 13, 32, 34, 39, 9, 13, 110, 121, 111, 34, 32, 62, 63, 9, 13, 101, 115, 110, 121, 111, 39, 101, 115, 65, 90, 97, 122, 39, 95, 45, 46, 48, 57, 65, 90, 97, 122, 95, 45, 46, 48, 58, 65, 90, 97, 122, 39, 95, 45, 46, 48, 58, 65, 90, 97, 122, 62, 62, 10, 60, 33, 47, 58, 63, 95, 65, 90, 97, 122, 39, 93, 34, 34, 39, 34, 39, 32, 9, 13, 32, 118, 9, 13, 10, 45, 45, 10, 93, 93, 10, 62, 63, 62, 0 }; } private static final char _hpricot_scan_trans_keys[] = init__hpricot_scan_trans_keys_0(); private static byte[] init__hpricot_scan_single_lengths_0() { return new byte [] { 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 1, 1, 1, 1, 1, 1, 3, 4, 4, 3, 5, 1, 1, 3, 1, 2, 1, 1, 4, 4, 7, 7, 7, 7, 5, 2, 2, 4, 2, 3, 3, 4, 2, 3, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 2, 5, 5, 6, 6, 1, 7, 5, 5, 7, 8, 8, 5, 7, 9, 9, 6, 6, 6, 1, 7, 7, 2, 7, 7, 9, 9, 8, 8, 9, 9, 6, 7, 7, 7, 7, 2, 6, 1, 7, 7, 2, 7, 6, 8, 9, 9, 6, 7, 9, 9, 7, 9, 10, 8, 8, 3, 7, 7, 9, 9, 9, 7, 9, 7, 7, 7, 10, 8, 2, 6, 7, 6, 2, 6, 1, 1, 7, 3, 3, 4, 4, 3, 1, 1, 1, 1, 1, 1, 2, 3, 1, 2, 3, 5, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 0, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 2, 1, 1, 3, 1, 1, 2, 1, 1, 1, 1, 0, 2, 1, 2, 1, 1, 2, 5, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 3, 1 }; } private static final byte _hpricot_scan_single_lengths[] = init__hpricot_scan_single_lengths_0(); private static byte[] init__hpricot_scan_range_lengths_0() { return new byte [] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 1, 0, 0, 0, 0, 0, 1, 1, 4, 4, 1, 1, 0, 0, 1, 0, 1, 0, 0, 5, 5, 5, 5, 5, 5, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 5, 1, 4, 4, 4, 4, 0, 2, 2, 4, 5, 5, 5, 2, 2, 5, 5, 2, 2, 4, 0, 4, 4, 0, 2, 2, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 4, 0, 4, 0, 4, 4, 0, 2, 2, 5, 5, 5, 2, 2, 5, 5, 2, 5, 5, 4, 4, 0, 2, 2, 5, 5, 5, 2, 5, 4, 4, 2, 5, 5, 0, 4, 2, 4, 0, 2, 0, 0, 2, 2, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 4, 4, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }; } private static final byte _hpricot_scan_range_lengths[] = init__hpricot_scan_range_lengths_0(); private static short[] init__hpricot_scan_index_offsets_0() { return new short [] { 0, 4, 6, 8, 10, 12, 14, 16, 18, 21, 28, 39, 46, 48, 50, 52, 54, 56, 59, 64, 73, 82, 87, 94, 96, 98, 103, 105, 109, 111, 113, 123, 133, 146, 159, 172, 185, 192, 195, 198, 204, 207, 212, 217, 223, 226, 231, 237, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269, 274, 284, 288, 298, 308, 319, 330, 332, 342, 350, 360, 373, 387, 401, 409, 419, 434, 449, 458, 467, 478, 480, 492, 504, 507, 517, 527, 542, 557, 571, 585, 600, 615, 624, 634, 644, 654, 666, 669, 680, 682, 694, 706, 709, 719, 728, 742, 757, 772, 781, 791, 806, 821, 831, 846, 862, 875, 888, 892, 902, 912, 927, 942, 957, 967, 982, 994, 1006, 1016, 1032, 1046, 1049, 1060, 1070, 1081, 1084, 1093, 1095, 1097, 1107, 1113, 1122, 1132, 1142, 1151, 1153, 1155, 1157, 1159, 1161, 1163, 1167, 1172, 1178, 1185, 1190, 1197, 1199, 1201, 1203, 1205, 1207, 1209, 1211, 1213, 1217, 1222, 1225, 1232, 1237, 1243, 1245, 1247, 1249, 1251, 1253, 1255, 1257, 1259, 1261, 1265, 1270, 1273, 1275, 1277, 1282, 1284, 1286, 1289, 1291, 1293, 1295, 1297, 1300, 1307, 1313, 1320, 1322, 1324, 1327, 1335, 1337, 1339, 1341, 1343, 1345, 1348, 1351, 1355, 1358, 1360, 1363, 1365, 1369 }; } private static final short _hpricot_scan_index_offsets[] = init__hpricot_scan_index_offsets_0(); private static short[] init__hpricot_scan_indicies_0() { return new short [] { 1, 2, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0, 9, 0, 10, 0, 11, 11, 0, 11, 12, 12, 11, 12, 12, 0, 13, 15, 14, 16, 14, 13, 14, 14, 14, 14, 0, 17, 18, 19, 20, 21, 17, 0, 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 27, 27, 0, 27, 28, 29, 27, 0, 30, 31, 30, 30, 30, 30, 30, 30, 0, 32, 33, 32, 32, 32, 32, 32, 32, 0, 34, 18, 21, 34, 0, 34, 35, 36, 18, 21, 34, 0, 38, 37, 41, 40, 42, 18, 21, 42, 39, 43, 21, 43, 18, 43, 39, 38, 44, 41, 45, 46, 47, 46, 46, 46, 46, 46, 46, 46, 0, 48, 49, 48, 48, 48, 48, 48, 48, 48, 0, 50, 50, 48, 49, 18, 21, 48, 34, 48, 48, 48, 48, 0, 50, 50, 35, 51, 18, 21, 48, 34, 48, 48, 48, 48, 0, 52, 52, 54, 55, 56, 57, 54, 53, 54, 54, 54, 54, 44, 58, 58, 61, 62, 63, 64, 60, 59, 60, 60, 60, 60, 45, 59, 61, 65, 63, 64, 59, 45, 67, 68, 66, 70, 71, 69, 72, 41, 63, 64, 72, 45, 73, 74, 64, 75, 76, 43, 75, 21, 74, 41, 63, 74, 45, 77, 41, 78, 79, 77, 40, 73, 80, 79, 80, 41, 78, 80, 40, 81, 38, 56, 57, 81, 44, 60, 82, 60, 60, 60, 60, 60, 60, 60, 45, 83, 0, 84, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 94, 94, 94, 0, 95, 97, 96, 96, 95, 96, 96, 96, 96, 0, 98, 99, 98, 0, 100, 102, 103, 101, 101, 100, 101, 101, 101, 0, 104, 106, 107, 105, 105, 104, 105, 105, 105, 0, 108, 110, 111, 112, 109, 109, 108, 109, 109, 109, 39, 113, 115, 116, 117, 114, 114, 113, 114, 114, 114, 39, 118, 39, 120, 120, 122, 123, 124, 39, 117, 120, 121, 119, 126, 126, 128, 39, 129, 126, 127, 125, 130, 115, 117, 114, 114, 130, 114, 114, 114, 39, 126, 126, 132, 39, 133, 131, 131, 126, 127, 131, 131, 131, 125, 134, 134, 137, 39, 138, 139, 136, 136, 134, 135, 136, 136, 136, 125, 140, 140, 132, 39, 142, 133, 131, 131, 140, 141, 131, 131, 131, 125, 126, 126, 128, 39, 129, 126, 127, 125, 143, 143, 145, 146, 147, 39, 129, 143, 144, 119, 148, 148, 122, 123, 124, 39, 117, 150, 150, 148, 149, 150, 150, 150, 119, 143, 143, 145, 146, 151, 39, 133, 150, 150, 143, 144, 150, 150, 150, 119, 153, 153, 155, 156, 157, 158, 153, 154, 152, 160, 160, 162, 163, 164, 165, 160, 161, 159, 166, 167, 169, 170, 168, 168, 166, 168, 168, 168, 164, 167, 164, 171, 167, 173, 174, 175, 172, 172, 171, 172, 172, 172, 164, 176, 167, 169, 177, 170, 168, 168, 176, 168, 168, 168, 164, 167, 178, 164, 179, 179, 181, 182, 183, 164, 170, 179, 180, 152, 184, 184, 181, 182, 183, 164, 170, 184, 185, 152, 184, 184, 181, 182, 183, 164, 170, 186, 186, 184, 185, 186, 186, 186, 152, 187, 187, 189, 190, 191, 164, 192, 186, 186, 187, 188, 186, 186, 186, 152, 153, 153, 155, 191, 157, 193, 186, 186, 153, 154, 186, 186, 186, 152, 160, 160, 162, 195, 164, 192, 194, 194, 160, 161, 194, 194, 194, 159, 196, 196, 162, 199, 164, 200, 201, 198, 198, 196, 197, 198, 198, 198, 159, 202, 202, 162, 195, 164, 204, 192, 194, 194, 202, 203, 194, 194, 194, 159, 160, 160, 162, 163, 164, 165, 160, 161, 159, 187, 187, 189, 190, 156, 164, 165, 187, 188, 152, 206, 206, 208, 209, 210, 211, 212, 206, 207, 205, 214, 214, 216, 217, 218, 219, 220, 214, 215, 213, 221, 222, 223, 225, 226, 224, 224, 221, 224, 224, 224, 219, 222, 223, 219, 228, 167, 230, 231, 229, 229, 228, 229, 229, 229, 227, 167, 227, 232, 167, 234, 235, 236, 233, 233, 232, 233, 233, 233, 227, 237, 167, 230, 238, 231, 229, 229, 237, 229, 229, 229, 227, 167, 239, 227, 241, 241, 243, 244, 245, 227, 231, 241, 242, 240, 247, 247, 162, 249, 227, 250, 247, 248, 246, 247, 247, 162, 252, 227, 253, 251, 251, 247, 248, 251, 251, 251, 246, 254, 254, 162, 257, 227, 258, 259, 256, 256, 254, 255, 256, 256, 256, 246, 260, 260, 162, 252, 227, 262, 253, 251, 251, 260, 261, 251, 251, 251, 246, 247, 247, 162, 249, 227, 250, 247, 248, 246, 263, 263, 265, 266, 267, 227, 250, 263, 264, 240, 268, 268, 243, 244, 245, 227, 231, 270, 270, 268, 269, 270, 270, 270, 240, 263, 263, 265, 266, 271, 227, 253, 270, 270, 263, 264, 270, 270, 270, 240, 206, 206, 272, 209, 210, 211, 212, 206, 207, 205, 214, 214, 216, 217, 274, 219, 275, 273, 273, 214, 215, 273, 273, 273, 213, 276, 276, 216, 217, 279, 219, 280, 281, 278, 278, 276, 277, 278, 278, 278, 213, 282, 222, 223, 225, 283, 226, 224, 224, 282, 224, 224, 224, 219, 284, 222, 223, 286, 287, 288, 285, 285, 284, 285, 285, 285, 219, 222, 223, 289, 219, 290, 290, 292, 293, 294, 219, 226, 290, 291, 205, 295, 295, 292, 293, 294, 219, 226, 295, 296, 205, 295, 295, 292, 293, 294, 219, 226, 297, 297, 295, 296, 297, 297, 297, 205, 298, 298, 300, 301, 302, 219, 275, 297, 297, 298, 299, 297, 297, 297, 205, 206, 206, 272, 209, 302, 211, 303, 297, 297, 206, 207, 297, 297, 297, 205, 214, 214, 216, 217, 218, 219, 220, 214, 215, 213, 206, 206, 208, 209, 302, 211, 303, 297, 297, 206, 207, 297, 297, 297, 205, 304, 305, 306, 308, 309, 307, 307, 304, 307, 307, 307, 211, 304, 310, 306, 308, 309, 307, 307, 304, 307, 307, 307, 211, 298, 298, 300, 301, 210, 219, 220, 298, 299, 205, 311, 311, 216, 217, 274, 219, 313, 275, 273, 273, 311, 312, 273, 273, 273, 213, 314, 314, 155, 271, 316, 317, 270, 270, 314, 315, 270, 270, 270, 240, 305, 306, 211, 318, 319, 321, 322, 320, 320, 318, 320, 320, 320, 316, 268, 268, 243, 244, 245, 227, 231, 268, 269, 240, 323, 319, 325, 326, 324, 324, 323, 324, 324, 324, 157, 310, 306, 211, 314, 314, 155, 267, 316, 327, 314, 315, 240, 319, 157, 319, 316, 148, 148, 122, 123, 124, 39, 117, 148, 149, 119, 328, 328, 329, 328, 328, 0, 330, 331, 331, 330, 331, 331, 331, 331, 0, 330, 331, 331, 332, 330, 331, 331, 331, 331, 0, 330, 331, 331, 333, 330, 331, 331, 331, 331, 0, 334, 331, 331, 334, 331, 331, 331, 331, 0, 336, 335, 337, 335, 338, 335, 339, 335, 340, 335, 341, 335, 341, 342, 341, 335, 342, 343, 344, 342, 335, 345, 345, 345, 345, 345, 335, 346, 347, 347, 347, 347, 347, 335, 348, 349, 350, 348, 335, 348, 349, 350, 351, 352, 348, 335, 349, 335, 353, 335, 354, 335, 355, 335, 356, 335, 357, 335, 358, 335, 359, 335, 359, 360, 359, 335, 360, 361, 362, 360, 335, 363, 363, 335, 364, 365, 365, 365, 365, 365, 335, 366, 349, 350, 366, 335, 366, 349, 350, 352, 366, 335, 367, 335, 368, 335, 369, 335, 370, 335, 371, 335, 372, 335, 373, 335, 374, 335, 375, 335, 375, 376, 375, 335, 376, 377, 378, 376, 335, 379, 380, 335, 381, 335, 382, 335, 383, 349, 350, 383, 335, 384, 335, 381, 335, 385, 386, 335, 387, 335, 382, 335, 388, 335, 387, 335, 389, 389, 335, 364, 390, 390, 390, 390, 390, 335, 391, 391, 391, 391, 391, 335, 346, 392, 392, 392, 392, 392, 335, 394, 393, 396, 395, 398, 399, 397, 401, 402, 403, 404, 403, 403, 403, 400, 41, 45, 43, 21, 41, 40, 167, 164, 167, 227, 222, 223, 219, 330, 330, 406, 334, 407, 334, 406, 409, 410, 408, 412, 411, 414, 415, 413, 417, 416, 419, 420, 421, 418, 420, 422, 0 }; } private static final short _hpricot_scan_indicies[] = init__hpricot_scan_indicies_0(); private static short[] init__hpricot_scan_trans_targs_0() { return new short [] { 198, 1, 2, 53, 198, 3, 4, 5, 6, 7, 8, 9, 10, 11, 10, 198, 26, 11, 198, 12, 48, 26, 13, 14, 15, 16, 17, 18, 19, 30, 20, 21, 20, 21, 22, 23, 28, 24, 25, 198, 24, 25, 25, 27, 29, 29, 31, 32, 31, 32, 33, 34, 35, 36, 47, 32, 200, 40, 35, 36, 47, 37, 34, 200, 40, 46, 38, 39, 43, 38, 39, 43, 39, 41, 42, 41, 201, 43, 202, 44, 45, 39, 32, 49, 50, 51, 52, 21, 54, 55, 56, 57, 58, 198, 60, 61, 60, 198, 61, 198, 63, 62, 66, 198, 63, 64, 66, 198, 65, 64, 66, 67, 198, 65, 64, 66, 67, 198, 198, 68, 138, 74, 136, 137, 73, 68, 69, 70, 73, 198, 69, 71, 73, 198, 65, 72, 71, 73, 74, 198, 65, 72, 74, 75, 76, 77, 135, 73, 75, 76, 71, 73, 78, 79, 89, 70, 92, 80, 203, 78, 79, 89, 70, 92, 80, 203, 79, 69, 81, 83, 203, 82, 81, 83, 84, 203, 82, 84, 203, 85, 93, 133, 134, 92, 86, 87, 90, 86, 87, 88, 94, 92, 203, 203, 90, 92, 82, 91, 90, 92, 93, 203, 82, 91, 93, 95, 96, 113, 105, 89, 123, 97, 205, 95, 96, 113, 105, 89, 123, 97, 205, 96, 98, 79, 116, 117, 205, 99, 98, 100, 102, 204, 101, 100, 102, 103, 204, 101, 103, 204, 104, 132, 109, 130, 131, 108, 104, 98, 105, 108, 204, 106, 108, 204, 101, 107, 106, 108, 109, 204, 101, 107, 109, 110, 111, 112, 129, 108, 110, 111, 106, 108, 105, 114, 123, 205, 115, 128, 114, 123, 127, 205, 115, 118, 115, 116, 117, 118, 205, 205, 119, 127, 125, 126, 123, 120, 121, 114, 120, 121, 122, 124, 123, 205, 96, 98, 79, 116, 117, 205, 98, 115, 128, 127, 98, 105, 99, 204, 98, 69, 100, 102, 204, 79, 81, 83, 203, 204, 140, 141, 206, 140, 142, 143, 207, 198, 145, 146, 147, 148, 149, 150, 151, 152, 194, 153, 154, 153, 155, 198, 156, 157, 170, 158, 159, 160, 161, 162, 163, 164, 165, 166, 192, 167, 168, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 187, 182, 185, 183, 184, 184, 186, 188, 190, 189, 191, 193, 193, 195, 195, 208, 208, 210, 210, 198, 198, 199, 198, 0, 59, 62, 139, 198, 198, 144, 208, 208, 209, 208, 196, 210, 210, 211, 210, 197, 212, 212, 212, 213, 212 }; } private static final short _hpricot_scan_trans_targs[] = init__hpricot_scan_trans_targs_0(); private static short[] init__hpricot_scan_trans_actions_0() { return new short [] { 73, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, 0, 1, 5, 0, 92, 5, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 83, 0, 19, 0, 0, 0, 3, 86, 75, 0, 21, 0, 0, 3, 0, 3, 83, 0, 19, 0, 19, 3, 3, 3, 172, 188, 3, 0, 0, 0, 0, 113, 146, 0, 21, 3, 86, 86, 0, 21, 21, 0, 21, 0, 0, 146, 0, 146, 0, 0, 3, 113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 1, 5, 0, 98, 0, 55, 5, 0, 5, 95, 0, 116, 0, 53, 11, 0, 110, 11, 168, 0, 180, 23, 0, 122, 57, 3, 3, 3, 0, 0, 89, 0, 9, 9, 104, 164, 0, 180, 119, 176, 107, 107, 0, 160, 11, 201, 9, 9, 0, 80, 80, 0, 0, 152, 3, 3, 196, 156, 3, 80, 80, 77, 152, 3, 226, 0, 9, 9, 7, 104, 0, 211, 0, 7, 180, 23, 192, 11, 0, 110, 11, 216, 0, 0, 149, 3, 3, 7, 0, 89, 3, 3, 196, 80, 80, 7, 0, 156, 221, 232, 180, 119, 107, 107, 0, 160, 11, 238, 9, 9, 0, 3, 80, 80, 101, 77, 152, 3, 226, 0, 9, 9, 7, 7, 104, 0, 211, 0, 7, 7, 180, 23, 192, 0, 0, 180, 23, 192, 11, 0, 110, 11, 216, 0, 0, 149, 3, 3, 3, 0, 7, 89, 0, 9, 9, 104, 211, 180, 119, 221, 107, 107, 0, 160, 11, 238, 9, 9, 0, 80, 80, 0, 7, 152, 3, 3, 196, 156, 77, 180, 119, 221, 107, 107, 0, 160, 11, 238, 0, 0, 11, 0, 110, 11, 216, 149, 3, 3, 7, 7, 89, 3, 3, 196, 80, 80, 7, 7, 156, 232, 3, 77, 77, 196, 89, 206, 101, 9, 9, 0, 80, 80, 3, 232, 3, 77, 196, 89, 206, 3, 196, 89, 206, 226, 25, 25, 0, 0, 0, 0, 31, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 13, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 17, 0, 0, 3, 3, 0, 0, 3, 0, 3, 0, 37, 137, 43, 140, 63, 134, 184, 69, 0, 0, 1, 0, 65, 67, 0, 33, 125, 31, 35, 0, 39, 128, 31, 41, 0, 45, 131, 143, 0, 47 }; } private static final short _hpricot_scan_trans_actions[] = init__hpricot_scan_trans_actions_0(); private static short[] init__hpricot_scan_to_state_actions_0() { return new short [] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 0, 27, 0 }; } private static final short _hpricot_scan_to_state_actions[] = init__hpricot_scan_to_state_actions_0(); private static short[] init__hpricot_scan_from_state_actions_0() { return new short [] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 0, 29, 0, 29, 0 }; } private static final short _hpricot_scan_from_state_actions[] = init__hpricot_scan_from_state_actions_0(); private static short[] init__hpricot_scan_eof_trans_0() { return new short [] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40, 40, 40, 40, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 1, 1, 1, 1, 1, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 394, 396, 0, 401, 406, 406, 406, 40, 40, 40, 407, 407, 0, 412, 0, 417, 0, 423 }; } private static final short _hpricot_scan_eof_trans[] = init__hpricot_scan_eof_trans_0(); static final int hpricot_scan_start = 198; static final int hpricot_scan_error = -1; static final int hpricot_scan_en_html_comment = 208; static final int hpricot_scan_en_html_cdata = 210; static final int hpricot_scan_en_html_procins = 212; static final int hpricot_scan_en_main = 198; // line 576 "hpricot_scan.java.rl" public final static int BUFSIZE = 16384; private int cs, act, have = 0, nread = 0, curline = 1; private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0; private byte[] data; private State S = null; private IRubyObject port, opts, attr, tag, akey, aval, bufsize; private int mark_tag = -1, mark_akey = -1, mark_aval = -1; private boolean done = false, ele_open = false, taint = false, io = false, text = false; private int buffer_size = 0; private Extra x; private IRubyObject self; private Ruby runtime; private ThreadContext ctx; private Block block; private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins; private RaiseException newRaiseException(RubyClass exceptionClass, String message) { return new RaiseException(runtime, exceptionClass, message, true); } public Scanner(IRubyObject self, IRubyObject[] args, Block block) { this.self = self; this.runtime = self.getRuntime(); this.ctx = runtime.getCurrentContext(); this.block = block; attr = runtime.getNil(); tag = runtime.getNil(); akey = runtime.getNil(); aval = runtime.getNil(); bufsize = runtime.getNil(); this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct(); this.xmldecl = x.sym_xmldecl; this.doctype = x.sym_doctype; this.stag = x.sym_stag; this.etag = x.sym_etag; this.emptytag = x.sym_emptytag; this.comment = x.sym_comment; this.cdata = x.sym_cdata; this.procins = x.sym_procins; port = args[0]; if(args.length == 2) { opts = args[1]; } else { opts = runtime.getNil(); } taint = port.isTaint(); io = port.respondsTo("read"); if(!io) { if(port.respondsTo("to_str")) { port = port.callMethod(ctx, "to_str"); port = port.convertToString(); } else { throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)"); } } if(!(opts instanceof RubyHash)) { opts = runtime.getNil(); } if(!block.isGiven()) { S = new State(); S.doc = x.cDoc.allocate(); S.focus = S.doc; S.last = runtime.getNil(); S.xml = OPT(opts, "xml"); S.strict = OPT(opts, "xhtml_strict"); S.fixup = OPT(opts, "fixup_tags"); if(S.strict) { S.fixup = true; } S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts); S.EC = x.mHpricot.getConstant("ElementContent"); } buffer_size = BUFSIZE; if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) { bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size"); if(!bufsize.isNil()) { buffer_size = RubyNumeric.fix2int(bufsize); } } if(io) { buf = 0; data = new byte[buffer_size]; } } private int len, space; // hpricot_scan public IRubyObject scan() { // line 1216 "HpricotScanService.java" { cs = hpricot_scan_start; ts = -1; te = -1; act = 0; } // line 679 "hpricot_scan.java.rl" while(!done) { p = pe = len = buf; space = buffer_size - have; if(io) { if(space == 0) { /* We've used up the entire buffer storing an already-parsed token * prefix that must be preserved. Likely caused by super-long attributes. * Increase buffer size and continue */ buffer_size += BUFSIZE; data = realloc(data, buffer_size); space = buffer_size - have; } p = have; IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space)); ByteList bl = str.convertToString().getByteList(); len = bl.realSize; System.arraycopy(bl.bytes, bl.begin, data, p, len); } else { ByteList bl = port.convertToString().getByteList(); data = bl.bytes; buf = bl.begin; p = bl.begin; len = bl.realSize + 1; if(p + len >= data.length) { data = new byte[len]; System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize); p = 0; buf = 0; } done = true; eof = p + len; } nread += len; /* If this is the last buffer, tack on an EOF. */ if(io && len < space) { data[p + len++] = 0; eof = p + len; done = true; } pe = p + len; // line 1272 "HpricotScanService.java" { int _klen; int _trans = 0; int _acts; int _nacts; int _keys; int _goto_targ = 0; _goto: while (true) { switch ( _goto_targ ) { case 0: if ( p == pe ) { _goto_targ = 4; continue _goto; } case 1: _acts = _hpricot_scan_from_state_actions[cs]; _nacts = (int) _hpricot_scan_actions[_acts++]; while ( _nacts-- > 0 ) { switch ( _hpricot_scan_actions[_acts++] ) { case 21: // line 1 "NONE" {ts = p;} break; // line 1297 "HpricotScanService.java" } } _match: do { _keys = _hpricot_scan_key_offsets[cs]; _trans = _hpricot_scan_index_offsets[cs]; _klen = _hpricot_scan_single_lengths[cs]; if ( _klen > 0 ) { int _lower = _keys; int _mid; int _upper = _keys + _klen - 1; while (true) { if ( _upper < _lower ) break; _mid = _lower + ((_upper-_lower) >> 1); if ( data[p] < _hpricot_scan_trans_keys[_mid] ) _upper = _mid - 1; else if ( data[p] > _hpricot_scan_trans_keys[_mid] ) _lower = _mid + 1; else { _trans += (_mid - _keys); break _match; } } _keys += _klen; _trans += _klen; } _klen = _hpricot_scan_range_lengths[cs]; if ( _klen > 0 ) { int _lower = _keys; int _mid; int _upper = _keys + (_klen<<1) - 2; while (true) { if ( _upper < _lower ) break; _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( data[p] < _hpricot_scan_trans_keys[_mid] ) _upper = _mid - 2; else if ( data[p] > _hpricot_scan_trans_keys[_mid+1] ) _lower = _mid + 2; else { _trans += ((_mid - _keys)>>1); break _match; } } _trans += _klen; } } while (false); _trans = _hpricot_scan_indicies[_trans]; case 3: cs = _hpricot_scan_trans_targs[_trans]; if ( _hpricot_scan_trans_actions[_trans] != 0 ) { _acts = _hpricot_scan_trans_actions[_trans]; _nacts = (int) _hpricot_scan_actions[_acts++]; while ( _nacts-- > 0 ) { switch ( _hpricot_scan_actions[_acts++] ) { case 0: // line 526 "hpricot_scan.java.rl" { if(text) { tag = CAT(tag, mark_tag, p); ELE(x.sym_text); text = false; } attr = runtime.getNil(); tag = runtime.getNil(); mark_tag = -1; ele_open = true; } break; case 1: // line 538 "hpricot_scan.java.rl" { mark_tag = p; } break; case 2: // line 539 "hpricot_scan.java.rl" { mark_aval = p; } break; case 3: // line 540 "hpricot_scan.java.rl" { mark_akey = p; } break; case 4: // line 541 "hpricot_scan.java.rl" { tag = SET(mark_tag, p, tag); } break; case 5: // line 543 "hpricot_scan.java.rl" { aval = SET(mark_aval, p, aval); } break; case 6: // line 544 "hpricot_scan.java.rl" { if(data[p-1] == '"' || data[p-1] == '\'') { aval = SET(mark_aval, p-1, aval); } else { aval = SET(mark_aval, p, aval); } } break; case 7: // line 551 "hpricot_scan.java.rl" { akey = SET(mark_akey, p, akey); } break; case 8: // line 552 "hpricot_scan.java.rl" { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); } break; case 9: // line 553 "hpricot_scan.java.rl" { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); } break; case 10: // line 554 "hpricot_scan.java.rl" { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); } break; case 11: // line 555 "hpricot_scan.java.rl" { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); } break; case 12: // line 556 "hpricot_scan.java.rl" { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); } break; case 13: // line 558 "hpricot_scan.java.rl" { akey = runtime.getNil(); aval = runtime.getNil(); mark_akey = -1; mark_aval = -1; } break; case 14: // line 565 "hpricot_scan.java.rl" { if(!S.xml && !akey.isNil()) { akey = akey.callMethod(runtime.getCurrentContext(), "downcase"); } ATTR(akey, aval); } break; case 15: // line 9 "hpricot_common.rl" {curline += 1;} break; case 16: // line 46 "hpricot_common.rl" { TEXT_PASS(); } break; case 17: // line 50 "hpricot_common.rl" { EBLK(comment, 3); {cs = 198; _goto_targ = 2; if (true) continue _goto;} } break; case 18: // line 55 "hpricot_common.rl" { EBLK(cdata, 3); {cs = 198; _goto_targ = 2; if (true) continue _goto;} } break; case 19: // line 60 "hpricot_common.rl" { EBLK(procins, 2); {cs = 198; _goto_targ = 2; if (true) continue _goto;} } break; case 22: // line 1 "NONE" {te = p+1;} break; case 23: // line 50 "hpricot_common.rl" {te = p+1;} break; case 24: // line 51 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} break; case 25: // line 51 "hpricot_common.rl" {te = p;p--;{ TEXT_PASS(); }} break; case 26: // line 51 "hpricot_common.rl" {{p = ((te))-1;}{ TEXT_PASS(); }} break; case 27: // line 55 "hpricot_common.rl" {te = p+1;} break; case 28: // line 56 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} break; case 29: // line 56 "hpricot_common.rl" {te = p;p--;{ TEXT_PASS(); }} break; case 30: // line 56 "hpricot_common.rl" {{p = ((te))-1;}{ TEXT_PASS(); }} break; case 31: // line 60 "hpricot_common.rl" {te = p+1;} break; case 32: // line 61 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} break; case 33: // line 61 "hpricot_common.rl" {te = p;p--;{ TEXT_PASS(); }} break; case 34: // line 66 "hpricot_common.rl" {act = 8;} break; case 35: // line 68 "hpricot_common.rl" {act = 10;} break; case 36: // line 70 "hpricot_common.rl" {act = 12;} break; case 37: // line 73 "hpricot_common.rl" {act = 15;} break; case 38: // line 65 "hpricot_common.rl" {te = p+1;{ ELE(xmldecl); }} break; case 39: // line 66 "hpricot_common.rl" {te = p+1;{ ELE(doctype); }} break; case 40: // line 68 "hpricot_common.rl" {te = p+1;{ ELE(stag); }} break; case 41: // line 69 "hpricot_common.rl" {te = p+1;{ ELE(etag); }} break; case 42: // line 70 "hpricot_common.rl" {te = p+1;{ ELE(emptytag); }} break; case 43: // line 71 "hpricot_common.rl" {te = p+1;{ {cs = 208; _goto_targ = 2; if (true) continue _goto;} }} break; case 44: // line 72 "hpricot_common.rl" {te = p+1;{ {cs = 210; _goto_targ = 2; if (true) continue _goto;} }} break; case 45: // line 73 "hpricot_common.rl" {te = p+1;{ TEXT_PASS(); }} break; case 46: // line 66 "hpricot_common.rl" {te = p;p--;{ ELE(doctype); }} break; case 47: // line 67 "hpricot_common.rl" {te = p;p--;{ {cs = 212; _goto_targ = 2; if (true) continue _goto;} }} break; case 48: // line 73 "hpricot_common.rl" {te = p;p--;{ TEXT_PASS(); }} break; case 49: // line 67 "hpricot_common.rl" {{p = ((te))-1;}{ {cs = 212; _goto_targ = 2; if (true) continue _goto;} }} break; case 50: // line 73 "hpricot_common.rl" {{p = ((te))-1;}{ TEXT_PASS(); }} break; case 51: // line 1 "NONE" { switch( act ) { case 8: {{p = ((te))-1;} ELE(doctype); } break; case 10: {{p = ((te))-1;} ELE(stag); } break; case 12: {{p = ((te))-1;} ELE(emptytag); } break; case 15: {{p = ((te))-1;} TEXT_PASS(); } break; } } break; // line 1601 "HpricotScanService.java" } } } case 2: _acts = _hpricot_scan_to_state_actions[cs]; _nacts = (int) _hpricot_scan_actions[_acts++]; while ( _nacts-- > 0 ) { switch ( _hpricot_scan_actions[_acts++] ) { case 20: // line 1 "NONE" {ts = -1;} break; // line 1615 "HpricotScanService.java" } } if ( ++p != pe ) { _goto_targ = 1; continue _goto; } case 4: if ( p == eof ) { if ( _hpricot_scan_eof_trans[cs] > 0 ) { _trans = _hpricot_scan_eof_trans[cs] - 1; _goto_targ = 3; continue _goto; } } case 5: } break; } } // line 726 "hpricot_scan.java.rl" if(cs == hpricot_scan_error) { if(!tag.isNil()) { throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY); } else { throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY); } } if(done && ele_open) { ele_open = false; if(ts > 0) { mark_tag = ts; ts = 0; text = true; } } if(ts == -1) { have = 0; if(mark_tag != -1 && text) { if(done) { if(mark_tag < p - 1) { tag = CAT(tag, mark_tag, p-1); ELE(x.sym_text); } } else { tag = CAT(tag, mark_tag, p); } } if(io) { mark_tag = 0; } else { mark_tag = ((RubyString)port).getByteList().begin; } } else if(io) { have = pe - ts; System.arraycopy(data, ts, data, buf, have); mark_tag = SLIDE(mark_tag); mark_akey = SLIDE(mark_akey); mark_aval = SLIDE(mark_aval); te -= ts; ts = 0; } } if(S != null) { return S.doc; } return runtime.getNil(); } } public static class HpricotModule { // hpricot_scan @JRubyMethod(module = true, optional = 1, required = 1, frame = true) public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) { return new Scanner(self, args, block).scan(); } // hpricot_css @JRubyMethod(module = true) public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) { return new HpricotCss(self, mod, str, node).scan(); } } public static class CData { @JRubyMethod public static IRubyObject content(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod(name = "content=") public static IRubyObject content_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_name(self, value); } } public static class Comment { @JRubyMethod public static IRubyObject content(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod(name = "content=") public static IRubyObject content_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_name(self, value); } } public static class DocType { @JRubyMethod public static IRubyObject raw_string(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_name(self); } @JRubyMethod public static IRubyObject target(IRubyObject self) { return hpricot_ele_get_target(self); } @JRubyMethod(name = "target=") public static IRubyObject target_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_target(self, value); } @JRubyMethod public static IRubyObject public_id(IRubyObject self) { return hpricot_ele_get_public_id(self); } @JRubyMethod(name = "public_id=") public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_public_id(self, value); } @JRubyMethod public static IRubyObject system_id(IRubyObject self) { return hpricot_ele_get_system_id(self); } @JRubyMethod(name = "system_id=") public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_system_id(self, value); } } public static class Elem { @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_raw(self); } } public static class BogusETag { @JRubyMethod public static IRubyObject raw_string(IRubyObject self) { return hpricot_ele_get_attr(self); } @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_attr(self); } } public static class Text { @JRubyMethod public static IRubyObject raw_string(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_name(self); } @JRubyMethod public static IRubyObject content(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod(name = "content=") public static IRubyObject content_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_name(self, value); } } public static class XMLDecl { @JRubyMethod public static IRubyObject raw_string(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod public static IRubyObject clear_raw(IRubyObject self) { return hpricot_ele_clear_name(self); } @JRubyMethod public static IRubyObject encoding(IRubyObject self) { return hpricot_ele_get_encoding(self); } @JRubyMethod(name = "encoding=") public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_encoding(self, value); } @JRubyMethod public static IRubyObject standalone(IRubyObject self) { return hpricot_ele_get_standalone(self); } @JRubyMethod(name = "standalone=") public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_standalone(self, value); } @JRubyMethod public static IRubyObject version(IRubyObject self) { return hpricot_ele_get_version(self); } @JRubyMethod(name = "version=") public static IRubyObject version_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_version(self, value); } } public static class ProcIns { @JRubyMethod public static IRubyObject target(IRubyObject self) { return hpricot_ele_get_name(self); } @JRubyMethod(name = "target=") public static IRubyObject target_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_name(self, value); } @JRubyMethod public static IRubyObject content(IRubyObject self) { return hpricot_ele_get_attr(self); } @JRubyMethod(name = "content=") public static IRubyObject content_set(IRubyObject self, IRubyObject value) { return hpricot_ele_set_attr(self, value); } } public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!"; public final static int H_ELE_TAG = 0; public final static int H_ELE_PARENT = 1; public final static int H_ELE_ATTR = 2; public final static int H_ELE_ETAG = 3; public final static int H_ELE_RAW = 4; public final static int H_ELE_EC = 5; public final static int H_ELE_HASH = 6; public final static int H_ELE_CHILDREN = 7; public static IRubyObject H_ELE_GET(IRubyObject recv, int n) { return ((IRubyObject[])recv.dataGetStruct())[n]; } public static RubyHash H_ELE_GET_asHash(IRubyObject recv, int n) { IRubyObject obj = ((IRubyObject[])recv.dataGetStruct())[n]; if(obj.isNil()) { obj = RubyHash.newHash(recv.getRuntime()); ((IRubyObject[])recv.dataGetStruct())[n] = obj; } return (RubyHash)obj; } public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) { ((IRubyObject[])recv.dataGetStruct())[n] = value; return value; } private static class RefCallback implements Callback { private final int n; public RefCallback(int n) { this.n = n; } public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) { return H_ELE_GET(recv, n); } public Arity getArity() { return Arity.NO_ARGUMENTS; } } private static class SetCallback implements Callback { private final int n; public SetCallback(int n) { this.n = n; } public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) { return H_ELE_SET(recv, n, args[0]); } public Arity getArity() { return Arity.ONE_ARGUMENT; } } private final static Callback[] ref_func = new Callback[]{ new RefCallback(0), new RefCallback(1), new RefCallback(2), new RefCallback(3), new RefCallback(4), new RefCallback(5), new RefCallback(6), new RefCallback(7), new RefCallback(8), new RefCallback(9)}; private final static Callback[] set_func = new Callback[]{ new SetCallback(0), new SetCallback(1), new SetCallback(2), new SetCallback(3), new SetCallback(4), new SetCallback(5), new SetCallback(6), new SetCallback(7), new SetCallback(8), new SetCallback(9)}; public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() { // alloc_hpricot_struct public IRubyObject allocate(Ruby runtime, RubyClass klass) { RubyClass kurrent = klass; Object sz = kurrent.fastGetInternalVariable("__size__"); while(sz == null && kurrent != null) { kurrent = kurrent.getSuperClass(); sz = kurrent.fastGetInternalVariable("__size__"); } int size = RubyNumeric.fix2int((RubyObject)sz); RubyObject obj = new RubyObject(runtime, klass); IRubyObject[] all = new IRubyObject[size]; java.util.Arrays.fill(all, runtime.getNil()); obj.dataWrapStruct(all); return obj; } }; public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) { RubyClass klass = RubyClass.newClass(runtime, runtime.getObject()); klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length)); klass.setAllocator(alloc_hpricot_struct); for(int i = 0; i < members.length; i++) { String id = members[i].toString(); klass.defineMethod(id, ref_func[i]); klass.defineMethod(id + "=", set_func[i]); } return klass; } public boolean basicLoad(final Ruby runtime) throws IOException { Init_hpricot_scan(runtime); return true; } public static class Extra { IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_allowed, sym_children, sym_comment, sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno, sym_text, sym_EMPTY, sym_CDATA; public RubyModule mHpricot; public RubyClass structElem; public RubyClass structAttr; public RubyClass structBasic; public RubyClass cDoc; public RubyClass cCData; public RubyClass cComment; public RubyClass cDocType; public RubyClass cElem; public RubyClass cBogusETag; public RubyClass cText; public RubyClass cXMLDecl; public RubyClass cProcIns; public RubyClass rb_eHpricotParseError; public IRubyObject reProcInsParse; public Extra(Ruby runtime) { symAllow = runtime.newSymbol("allow"); symDeny = runtime.newSymbol("deny"); sym_xmldecl = runtime.newSymbol("xmldecl"); sym_doctype = runtime.newSymbol("doctype"); sym_procins = runtime.newSymbol("procins"); sym_stag = runtime.newSymbol("stag"); sym_etag = runtime.newSymbol("etag"); sym_emptytag = runtime.newSymbol("emptytag"); sym_allowed = runtime.newSymbol("allowed"); sym_children = runtime.newSymbol("children"); sym_comment = runtime.newSymbol("comment"); sym_cdata = runtime.newSymbol("cdata"); sym_name = runtime.newSymbol("name"); sym_parent = runtime.newSymbol("parent"); sym_raw_attributes = runtime.newSymbol("raw_attributes"); sym_raw_string = runtime.newSymbol("raw_string"); sym_tagno = runtime.newSymbol("tagno"); sym_text = runtime.newSymbol("text"); sym_EMPTY = runtime.newSymbol("EMPTY"); sym_CDATA = runtime.newSymbol("CDATA"); } } public static void Init_hpricot_scan(Ruby runtime) { Extra x = new Extra(runtime); x.mHpricot = runtime.defineModule("Hpricot"); x.mHpricot.dataWrapStruct(x); x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")}); x.mHpricot.defineAnnotatedMethods(HpricotModule.class); x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator()); x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children}); x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes}); x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent}); x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator()); x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator()); x.cCData.defineAnnotatedMethods(CData.class); x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator()); x.cComment.defineAnnotatedMethods(Comment.class); x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator()); x.cDocType.defineAnnotatedMethods(DocType.class); x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator()); x.cElem.defineAnnotatedMethods(Elem.class); x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator()); x.cBogusETag.defineAnnotatedMethods(BogusETag.class); x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator()); x.cText.defineAnnotatedMethods(Text.class); x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator()); x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class); x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator()); x.cProcIns.defineAnnotatedMethods(ProcIns.class); x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m"); x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse); } } hpricot-0.8.6/ext/hpricot_scan/HpricotCss.java0000644000175000017500000010426711710073440020762 0ustar boutilboutil // line 1 "hpricot_css.java.rl" import java.io.IOException; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyHash; import org.jruby.RubyModule; import org.jruby.RubyNumeric; import org.jruby.RubyObject; import org.jruby.RubyObjectAdapter; import org.jruby.RubyRegexp; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; import org.jruby.javasupport.JavaEmbedUtils; import org.jruby.runtime.Arity; import org.jruby.runtime.Block; import org.jruby.runtime.ObjectAllocator; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.callback.Callback; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.load.BasicLibraryService; import org.jruby.util.ByteList; public class HpricotCss { public void FILTER(String id) { IRubyObject[] args = new IRubyObject[fargs]; System.arraycopy(fvals, 0, args, 0, fargs); mod.callMethod(ctx, id, args); tmpt.rb_clear(); fargs = 1; } public void FILTERAUTO() { try { FILTER(new String(data, ts, te - ts, "ISO-8859-1")); } catch(java.io.UnsupportedEncodingException e) {} } public void PUSH(int aps, int ape) { RubyString str = RubyString.newString(runtime, data, aps, ape-aps); fvals[fargs++] = str; tmpt.append(str); } private IRubyObject self, mod, str, node; private int cs, act, eof, p, pe, ts, te, aps, ape, aps2, ape2; private byte[] data; private int fargs = 1; private IRubyObject[] fvals = new IRubyObject[6]; private RubyArray focus; private RubyArray tmpt; private Ruby runtime; private ThreadContext ctx; public HpricotCss(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) { this.self = self; this.mod = mod; this.str = str; this.node = node; this.runtime = self.getRuntime(); this.ctx = runtime.getCurrentContext(); this.focus = RubyArray.newArray(runtime, node); this.tmpt = runtime.newArray(); fvals[0] = focus; if(!(str instanceof RubyString)) { throw runtime.newArgumentError("bad CSS selector, String only please."); } ByteList bl = ((RubyString)str).getByteList(); data = bl.bytes; p = bl.begin; pe = p + bl.realSize; eof = pe; } // line 86 "HpricotCss.java" private static byte[] init__hpricot_css_actions_0() { return new byte [] { 0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 6, 1, 7, 1, 15, 1, 19, 1, 22, 1, 24, 1, 28, 1, 29, 1, 30, 1, 31, 1, 32, 1, 33, 1, 34, 1, 35, 2, 0, 3, 2, 1, 14, 2, 1, 16, 2, 1, 17, 2, 1, 18, 2, 1, 20, 2, 1, 21, 2, 1, 23, 2, 1, 25, 2, 1, 26, 2, 1, 27, 2, 4, 5, 2, 7, 8, 2, 7, 9, 2, 7, 10, 2, 7, 11, 2, 7, 12, 2, 7, 13, 3, 0, 1, 16, 3, 0, 1, 18, 3, 7, 0, 8, 3, 7, 0, 9, 3, 7, 0, 10, 3, 7, 0, 13, 3, 7, 1, 13 }; } private static final byte _hpricot_css_actions[] = init__hpricot_css_actions_0(); private static short[] init__hpricot_css_key_offsets_0() { return new short [] { 0, 0, 4, 20, 21, 23, 25, 27, 29, 30, 32, 34, 36, 38, 54, 55, 57, 59, 61, 63, 85, 89, 92, 95, 98, 99, 100, 101, 103, 107, 111, 114, 115, 116, 118, 119, 120, 122, 123, 125, 127, 129, 131, 137, 142, 153, 161, 165, 169, 173, 176, 180, 184, 201, 221, 222, 228, 229, 235, 237, 238, 239, 241, 242, 246, 253, 259, 261, 264, 267, 270, 272, 275, 277, 278, 299, 320, 341, 361, 384, 401, 403, 406, 409, 412, 415, 417, 419, 449, 453, 456, 472, 477, 495, 511, 527, 544, 563, 580, 598, 616, 634, 652, 670, 688, 705, 723, 741, 759, 777, 795, 812, 830, 849, 867, 885, 904, 922, 940, 958, 975, 976, 977, 994, 1011, 1027, 1044 }; } private static final short _hpricot_css_key_offsets[] = init__hpricot_css_key_offsets_0(); private static char[] init__hpricot_css_trans_keys_0() { return new char [] { 32, 44, 9, 13, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 46, 168, 191, 128, 191, 128, 191, 128, 191, 46, 168, 191, 128, 191, 128, 191, 128, 191, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 46, 168, 191, 128, 191, 128, 191, 128, 191, 45, 92, 95, 101, 102, 103, 108, 110, 111, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 34, 39, 40, 41, 34, 40, 41, 34, 40, 41, 34, 40, 41, 41, 41, 41, 34, 40, 34, 39, 40, 41, 34, 39, 40, 41, 39, 40, 41, 41, 41, 39, 40, 41, 41, 40, 41, 46, 168, 191, 128, 191, 128, 191, 128, 191, 34, 39, 40, 41, 48, 57, 34, 40, 41, 48, 57, 34, 39, 40, 41, 43, 45, 101, 110, 111, 48, 57, 34, 40, 41, 43, 45, 110, 48, 57, 34, 40, 41, 118, 34, 40, 41, 101, 34, 40, 41, 110, 34, 40, 41, 34, 40, 41, 100, 34, 40, 41, 100, 45, 92, 95, 110, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 32, 45, 61, 92, 95, 196, 9, 13, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 61, 32, 34, 39, 93, 9, 13, 93, 32, 34, 39, 93, 9, 13, 34, 93, 34, 93, 39, 93, 39, 32, 61, 9, 13, 32, 34, 39, 61, 93, 9, 13, 32, 34, 39, 93, 9, 13, 46, 61, 61, 168, 191, 61, 128, 191, 61, 128, 191, 128, 191, 61, 128, 191, 128, 191, 46, 32, 45, 61, 92, 95, 97, 196, 9, 13, 48, 57, 65, 90, 98, 122, 197, 223, 224, 239, 240, 244, 32, 45, 61, 92, 95, 109, 196, 9, 13, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 32, 45, 61, 92, 95, 101, 196, 9, 13, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 32, 45, 61, 92, 95, 196, 9, 13, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 32, 34, 39, 45, 61, 92, 93, 95, 196, 9, 13, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 45, 92, 93, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 46, 93, 93, 168, 191, 93, 128, 191, 93, 128, 191, 93, 128, 191, 168, 191, 128, 191, 32, 35, 43, 44, 45, 46, 58, 62, 91, 92, 95, 101, 110, 111, 126, 196, 9, 13, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 32, 44, 9, 13, 32, 9, 13, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 43, 45, 110, 48, 57, 43, 45, 92, 95, 110, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 113, 118, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 101, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 110, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 105, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 114, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 115, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 116, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 99, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 104, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 105, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 108, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 100, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 116, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 97, 116, 196, 48, 57, 65, 90, 98, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 116, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 104, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 100, 110, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 100, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 108, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 121, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 40, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 34, 39, 45, 92, 95, 118, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 45, 92, 95, 101, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 45, 92, 95, 100, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 45, 92, 95, 196, 48, 57, 65, 90, 97, 122, 197, 223, 224, 239, 240, 244, 0 }; } private static final char _hpricot_css_trans_keys[] = init__hpricot_css_trans_keys_0(); private static byte[] init__hpricot_css_single_lengths_0() { return new byte [] { 0, 2, 4, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 10, 4, 3, 3, 1, 1, 1, 1, 2, 4, 4, 1, 1, 1, 2, 1, 1, 2, 1, 0, 0, 0, 0, 4, 3, 9, 6, 4, 4, 4, 3, 4, 4, 5, 6, 1, 4, 1, 4, 2, 1, 1, 2, 1, 2, 5, 4, 2, 1, 1, 1, 0, 1, 0, 1, 7, 7, 7, 6, 9, 5, 2, 1, 1, 1, 1, 0, 0, 16, 2, 1, 4, 3, 6, 4, 4, 5, 7, 5, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 5, 6, 7, 6, 6, 7, 6, 6, 6, 5, 1, 1, 5, 5, 4, 5, 4 }; } private static final byte _hpricot_css_single_lengths[] = init__hpricot_css_single_lengths_0(); private static byte[] init__hpricot_css_range_lengths_0() { return new byte [] { 0, 1, 6, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 6, 0, 1, 1, 1, 1, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 6, 7, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 7, 7, 7, 7, 7, 6, 0, 1, 1, 1, 1, 1, 1, 7, 1, 1, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 6, 6, 6, 6, 6 }; } private static final byte _hpricot_css_range_lengths[] = init__hpricot_css_range_lengths_0(); private static short[] init__hpricot_css_index_offsets_0() { return new short [] { 0, 0, 4, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 46, 48, 50, 52, 54, 56, 73, 78, 82, 86, 89, 91, 93, 95, 98, 103, 108, 111, 113, 115, 118, 120, 122, 125, 127, 129, 131, 133, 135, 141, 146, 157, 165, 170, 175, 180, 184, 189, 194, 206, 220, 222, 228, 230, 236, 239, 241, 243, 246, 248, 252, 259, 265, 268, 271, 274, 277, 279, 282, 284, 286, 301, 316, 331, 345, 362, 374, 377, 380, 383, 386, 389, 391, 393, 417, 421, 424, 435, 440, 453, 464, 475, 487, 501, 513, 526, 539, 552, 565, 578, 591, 603, 616, 629, 642, 655, 668, 680, 693, 707, 720, 733, 747, 760, 773, 786, 798, 800, 802, 814, 826, 837, 849 }; } private static final short _hpricot_css_index_offsets[] = init__hpricot_css_index_offsets_0(); private static byte[] init__hpricot_css_trans_targs_0() { return new byte [] { 1, 89, 1, 87, 90, 3, 90, 4, 90, 90, 90, 5, 6, 7, 0, 90, 87, 90, 87, 90, 87, 5, 87, 6, 87, 93, 87, 93, 87, 93, 87, 10, 87, 11, 87, 94, 14, 94, 15, 94, 94, 94, 16, 17, 18, 0, 94, 87, 94, 87, 94, 87, 16, 87, 17, 87, 95, 37, 95, 96, 100, 111, 112, 113, 115, 38, 95, 95, 95, 39, 40, 41, 0, 22, 28, 34, 87, 21, 87, 87, 87, 21, 24, 25, 87, 23, 24, 87, 23, 87, 87, 87, 26, 27, 26, 24, 25, 87, 30, 21, 31, 87, 29, 30, 21, 87, 87, 29, 24, 87, 30, 87, 32, 33, 32, 24, 31, 87, 87, 35, 36, 35, 34, 87, 87, 95, 87, 95, 87, 95, 87, 39, 87, 40, 87, 22, 28, 34, 87, 43, 21, 87, 87, 87, 43, 21, 22, 28, 34, 87, 45, 45, 46, 45, 50, 45, 21, 87, 87, 87, 45, 45, 45, 45, 21, 87, 87, 87, 47, 21, 87, 87, 87, 48, 21, 87, 87, 87, 49, 21, 87, 87, 87, 21, 87, 87, 87, 51, 21, 87, 87, 87, 49, 21, 53, 73, 53, 74, 85, 53, 53, 53, 70, 72, 86, 0, 63, 53, 64, 66, 53, 67, 63, 53, 53, 53, 68, 69, 71, 54, 55, 0, 57, 58, 61, 0, 57, 56, 87, 56, 57, 58, 61, 87, 57, 56, 56, 120, 58, 60, 59, 87, 87, 56, 121, 61, 60, 62, 63, 64, 63, 54, 57, 58, 61, 65, 0, 57, 56, 57, 58, 61, 87, 57, 56, 53, 55, 0, 55, 53, 0, 55, 53, 0, 55, 70, 0, 53, 0, 55, 72, 0, 70, 0, 53, 0, 63, 53, 64, 66, 53, 75, 67, 63, 53, 53, 53, 68, 69, 71, 54, 63, 53, 64, 66, 53, 76, 67, 63, 53, 53, 53, 68, 69, 71, 54, 63, 53, 64, 66, 53, 77, 67, 63, 53, 53, 53, 68, 69, 71, 54, 63, 53, 78, 66, 53, 67, 63, 53, 53, 53, 68, 69, 71, 54, 57, 58, 61, 79, 65, 80, 0, 79, 81, 57, 79, 79, 79, 82, 83, 84, 56, 79, 80, 87, 79, 81, 79, 79, 79, 82, 83, 84, 56, 79, 87, 56, 87, 79, 56, 87, 79, 56, 87, 82, 56, 87, 83, 56, 53, 0, 72, 0, 88, 2, 91, 89, 92, 13, 19, 87, 52, 8, 93, 122, 92, 125, 87, 9, 88, 92, 93, 93, 10, 11, 12, 0, 1, 89, 1, 87, 89, 89, 87, 90, 3, 90, 4, 90, 90, 90, 5, 6, 7, 87, 91, 91, 91, 91, 87, 91, 92, 8, 93, 92, 9, 92, 93, 93, 10, 11, 12, 87, 93, 8, 93, 9, 93, 93, 93, 10, 11, 12, 87, 94, 14, 94, 15, 94, 94, 94, 16, 17, 18, 87, 20, 95, 37, 95, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 97, 98, 38, 95, 95, 95, 39, 40, 41, 87, 42, 95, 37, 95, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 99, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 97, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 101, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 102, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 103, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 104, 38, 95, 95, 95, 39, 40, 41, 87, 42, 105, 37, 95, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 106, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 107, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 108, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 109, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 110, 38, 95, 95, 95, 39, 40, 41, 87, 44, 95, 37, 95, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 97, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 102, 97, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 114, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 104, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 116, 117, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 97, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 118, 38, 95, 95, 95, 39, 40, 41, 87, 20, 95, 37, 95, 119, 38, 95, 95, 95, 39, 40, 41, 87, 20, 105, 37, 95, 38, 95, 95, 95, 39, 40, 41, 87, 60, 59, 60, 62, 93, 8, 93, 123, 9, 93, 93, 93, 10, 11, 12, 87, 93, 8, 93, 124, 9, 93, 93, 93, 10, 11, 12, 87, 93, 8, 93, 9, 93, 93, 93, 10, 11, 12, 87, 93, 8, 93, 126, 9, 93, 93, 93, 10, 11, 12, 87, 93, 8, 93, 9, 93, 93, 93, 10, 11, 12, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 0 }; } private static final byte _hpricot_css_trans_targs[] = init__hpricot_css_trans_targs_0(); private static byte[] init__hpricot_css_trans_actions_0() { return new byte [] { 0, 0, 0, 33, 99, 1, 99, 1, 99, 99, 99, 1, 1, 1, 0, 73, 35, 73, 35, 73, 35, 0, 35, 0, 35, 79, 35, 79, 35, 79, 35, 0, 35, 0, 35, 103, 1, 103, 1, 103, 103, 103, 1, 1, 1, 0, 76, 35, 76, 35, 76, 35, 0, 35, 0, 35, 111, 1, 111, 111, 111, 111, 111, 111, 111, 1, 111, 111, 111, 1, 1, 1, 0, 1, 1, 1, 95, 1, 35, 35, 49, 0, 0, 0, 35, 0, 0, 35, 0, 49, 35, 35, 0, 0, 0, 0, 0, 35, 0, 0, 0, 49, 0, 0, 0, 35, 49, 0, 0, 35, 0, 35, 0, 0, 0, 0, 0, 35, 35, 0, 0, 0, 0, 49, 35, 88, 35, 88, 35, 88, 35, 0, 35, 0, 35, 1, 1, 1, 95, 1, 1, 29, 29, 46, 0, 0, 1, 1, 1, 91, 1, 1, 1, 1, 1, 1, 1, 27, 27, 43, 0, 0, 0, 0, 0, 27, 27, 49, 0, 0, 27, 27, 49, 0, 0, 27, 27, 49, 0, 0, 27, 27, 43, 0, 27, 27, 49, 0, 0, 27, 27, 49, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 7, 7, 7, 0, 7, 7, 13, 0, 0, 0, 0, 13, 0, 0, 0, 11, 0, 0, 0, 13, 25, 0, 11, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 13, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 37, 7, 37, 0, 37, 37, 7, 37, 37, 37, 37, 37, 37, 7, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 13, 0, 0, 13, 0, 0, 13, 0, 0, 13, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 107, 0, 0, 15, 0, 1, 107, 107, 107, 107, 15, 1, 11, 107, 107, 107, 1, 1, 1, 0, 0, 0, 0, 23, 0, 0, 21, 73, 0, 73, 0, 73, 73, 73, 0, 0, 0, 52, 0, 0, 0, 0, 19, 3, 79, 0, 79, 79, 0, 79, 79, 79, 0, 0, 0, 58, 79, 0, 79, 0, 79, 79, 79, 0, 0, 0, 58, 76, 0, 76, 0, 76, 76, 76, 0, 0, 0, 55, 3, 88, 0, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 85, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 115, 3, 115, 3, 115, 115, 115, 3, 3, 3, 64, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 85, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 85, 0, 88, 88, 88, 0, 0, 0, 67, 3, 115, 3, 115, 3, 115, 115, 115, 3, 3, 3, 64, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 82, 0, 88, 88, 88, 0, 0, 0, 67, 3, 115, 3, 115, 3, 115, 115, 115, 3, 3, 3, 61, 3, 88, 0, 88, 85, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 85, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 85, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 85, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 88, 0, 88, 88, 88, 0, 0, 0, 67, 3, 88, 0, 88, 0, 88, 88, 88, 0, 0, 0, 67, 0, 0, 0, 0, 79, 0, 79, 79, 0, 79, 79, 79, 0, 0, 0, 58, 79, 0, 79, 79, 0, 79, 79, 79, 0, 0, 0, 58, 79, 0, 79, 0, 79, 79, 79, 0, 0, 0, 58, 79, 0, 79, 79, 0, 79, 79, 79, 0, 0, 0, 58, 79, 0, 79, 0, 79, 79, 79, 0, 0, 0, 58, 33, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 31, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 29, 29, 27, 27, 27, 27, 27, 27, 27, 27, 25, 25, 25, 23, 21, 52, 19, 58, 58, 55, 67, 67, 64, 67, 67, 67, 67, 67, 67, 64, 67, 67, 67, 67, 67, 61, 67, 67, 67, 67, 67, 67, 67, 67, 67, 17, 17, 58, 58, 58, 58, 58, 0 }; } private static final byte _hpricot_css_trans_actions[] = init__hpricot_css_trans_actions_0(); private static byte[] init__hpricot_css_to_state_actions_0() { return new byte [] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } private static final byte _hpricot_css_to_state_actions[] = init__hpricot_css_to_state_actions_0(); private static byte[] init__hpricot_css_from_state_actions_0() { return new byte [] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } private static final byte _hpricot_css_from_state_actions[] = init__hpricot_css_from_state_actions_0(); private static short[] init__hpricot_css_eof_trans_0() { return new short [] { 0, 861, 0, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 0, 898, 898, 898, 898, 898, 0, 877, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 900, 900, 908, 908, 908, 908, 908, 908, 908, 908, 0, 0, 0, 0, 0, 0, 0, 911, 911, 0, 911, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 912, 913, 914, 915, 950, 950, 918, 943, 943, 928, 943, 943, 943, 943, 943, 943, 928, 943, 943, 943, 943, 943, 934, 943, 943, 943, 943, 943, 943, 943, 943, 943, 945, 945, 950, 950, 950, 950, 950 }; } private static final short _hpricot_css_eof_trans[] = init__hpricot_css_eof_trans_0(); static final int hpricot_css_start = 87; static final int hpricot_css_error = 0; static final int hpricot_css_en_main = 87; // line 147 "hpricot_css.java.rl" public IRubyObject scan() { // line 533 "HpricotCss.java" { cs = hpricot_css_start; ts = -1; te = -1; act = 0; } // line 151 "hpricot_css.java.rl" // line 543 "HpricotCss.java" { int _klen; int _trans = 0; int _acts; int _nacts; int _keys; int _goto_targ = 0; _goto: while (true) { switch ( _goto_targ ) { case 0: if ( p == pe ) { _goto_targ = 4; continue _goto; } if ( cs == 0 ) { _goto_targ = 5; continue _goto; } case 1: _acts = _hpricot_css_from_state_actions[cs]; _nacts = (int) _hpricot_css_actions[_acts++]; while ( _nacts-- > 0 ) { switch ( _hpricot_css_actions[_acts++] ) { case 6: // line 1 "NONE" {ts = p;} break; // line 572 "HpricotCss.java" } } _match: do { _keys = _hpricot_css_key_offsets[cs]; _trans = _hpricot_css_index_offsets[cs]; _klen = _hpricot_css_single_lengths[cs]; if ( _klen > 0 ) { int _lower = _keys; int _mid; int _upper = _keys + _klen - 1; while (true) { if ( _upper < _lower ) break; _mid = _lower + ((_upper-_lower) >> 1); if ( data[p] < _hpricot_css_trans_keys[_mid] ) _upper = _mid - 1; else if ( data[p] > _hpricot_css_trans_keys[_mid] ) _lower = _mid + 1; else { _trans += (_mid - _keys); break _match; } } _keys += _klen; _trans += _klen; } _klen = _hpricot_css_range_lengths[cs]; if ( _klen > 0 ) { int _lower = _keys; int _mid; int _upper = _keys + (_klen<<1) - 2; while (true) { if ( _upper < _lower ) break; _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( data[p] < _hpricot_css_trans_keys[_mid] ) _upper = _mid - 2; else if ( data[p] > _hpricot_css_trans_keys[_mid+1] ) _lower = _mid + 2; else { _trans += ((_mid - _keys)>>1); break _match; } } _trans += _klen; } } while (false); case 3: cs = _hpricot_css_trans_targs[_trans]; if ( _hpricot_css_trans_actions[_trans] != 0 ) { _acts = _hpricot_css_trans_actions[_trans]; _nacts = (int) _hpricot_css_actions[_acts++]; while ( _nacts-- > 0 ) { switch ( _hpricot_css_actions[_acts++] ) { case 0: // line 85 "hpricot_css.java.rl" { aps = p; } break; case 1: // line 89 "hpricot_css.java.rl" { ape = p; PUSH(aps, ape); } break; case 2: // line 94 "hpricot_css.java.rl" { ape = p; aps2 = p; } break; case 3: // line 99 "hpricot_css.java.rl" { ape2 = p; PUSH(aps, ape); PUSH(aps2, ape2); } break; case 7: // line 1 "NONE" {te = p+1;} break; case 8: // line 132 "hpricot_css.java.rl" {act = 1;} break; case 9: // line 133 "hpricot_css.java.rl" {act = 2;} break; case 10: // line 136 "hpricot_css.java.rl" {act = 5;} break; case 11: // line 138 "hpricot_css.java.rl" {act = 7;} break; case 12: // line 139 "hpricot_css.java.rl" {act = 8;} break; case 13: // line 140 "hpricot_css.java.rl" {act = 9;} break; case 14: // line 134 "hpricot_css.java.rl" {te = p+1;{ FILTER("NAME"); }} break; case 15: // line 135 "hpricot_css.java.rl" {te = p+1;{ FILTER("ATTR"); }} break; case 16: // line 138 "hpricot_css.java.rl" {te = p+1;{ FILTER("CHILD"); }} break; case 17: // line 139 "hpricot_css.java.rl" {te = p+1;{ FILTER("POS"); }} break; case 18: // line 140 "hpricot_css.java.rl" {te = p+1;{ FILTER("PSUEDO"); }} break; case 19: // line 142 "hpricot_css.java.rl" {te = p+1;{ FILTERAUTO(); }} break; case 20: // line 132 "hpricot_css.java.rl" {te = p;p--;{ FILTER("ID"); }} break; case 21: // line 133 "hpricot_css.java.rl" {te = p;p--;{ FILTER("CLASS"); }} break; case 22: // line 135 "hpricot_css.java.rl" {te = p;p--;{ FILTER("ATTR"); }} break; case 23: // line 136 "hpricot_css.java.rl" {te = p;p--;{ FILTER("TAG"); }} break; case 24: // line 137 "hpricot_css.java.rl" {te = p;p--;{ FILTER("MOD"); }} break; case 25: // line 138 "hpricot_css.java.rl" {te = p;p--;{ FILTER("CHILD"); }} break; case 26: // line 139 "hpricot_css.java.rl" {te = p;p--;{ FILTER("POS"); }} break; case 27: // line 140 "hpricot_css.java.rl" {te = p;p--;{ FILTER("PSUEDO"); }} break; case 28: // line 141 "hpricot_css.java.rl" {te = p;p--;{ focus = RubyArray.newArray(runtime, node); }} break; case 29: // line 143 "hpricot_css.java.rl" {te = p;p--;} break; case 30: // line 135 "hpricot_css.java.rl" {{p = ((te))-1;}{ FILTER("ATTR"); }} break; case 31: // line 138 "hpricot_css.java.rl" {{p = ((te))-1;}{ FILTER("CHILD"); }} break; case 32: // line 139 "hpricot_css.java.rl" {{p = ((te))-1;}{ FILTER("POS"); }} break; case 33: // line 140 "hpricot_css.java.rl" {{p = ((te))-1;}{ FILTER("PSUEDO"); }} break; case 34: // line 143 "hpricot_css.java.rl" {{p = ((te))-1;}} break; case 35: // line 1 "NONE" { switch( act ) { case 0: {{cs = 0; _goto_targ = 2; if (true) continue _goto;}} break; case 1: {{p = ((te))-1;} FILTER("ID"); } break; case 2: {{p = ((te))-1;} FILTER("CLASS"); } break; case 5: {{p = ((te))-1;} FILTER("TAG"); } break; case 7: {{p = ((te))-1;} FILTER("CHILD"); } break; case 8: {{p = ((te))-1;} FILTER("POS"); } break; case 9: {{p = ((te))-1;} FILTER("PSUEDO"); } break; } } break; // line 802 "HpricotCss.java" } } } case 2: _acts = _hpricot_css_to_state_actions[cs]; _nacts = (int) _hpricot_css_actions[_acts++]; while ( _nacts-- > 0 ) { switch ( _hpricot_css_actions[_acts++] ) { case 4: // line 1 "NONE" {ts = -1;} break; case 5: // line 1 "NONE" {act = 0;} break; // line 820 "HpricotCss.java" } } if ( cs == 0 ) { _goto_targ = 5; continue _goto; } if ( ++p != pe ) { _goto_targ = 1; continue _goto; } case 4: if ( p == eof ) { if ( _hpricot_css_eof_trans[cs] > 0 ) { _trans = _hpricot_css_eof_trans[cs] - 1; _goto_targ = 3; continue _goto; } } case 5: } break; } } // line 152 "hpricot_css.java.rl" return focus; } } hpricot-0.8.6/ext/hpricot_scan/hpricot_scan.h0000644000175000017500000000361011710073440020651 0ustar boutilboutil/* * hpricot_scan.h * * $Author: why $ * $Date: 2006-05-08 22:03:50 -0600 (Mon, 08 May 2006) $ * * Copyright (C) 2006 why the lucky stiff * You can redistribute it and/or modify it under the same terms as Ruby. */ #ifndef hpricot_scan_h #define hpricot_scan_h #include #if defined(_WIN32) #include #endif /* * Memory Allocation */ #if defined(HAVE_ALLOCA_H) && !defined(__GNUC__) #include #endif #ifndef NULL # define NULL (void *)0 #endif #define BUFSIZE 16384 #define S_ALLOC_N(type,n) (type*)malloc(sizeof(type)*(n)) #define S_ALLOC(type) (type*)malloc(sizeof(type)) #define S_REALLOC_N(var,type,n) (var)=(type*)realloc((char*)(var),sizeof(type)*(n)) #define S_FREE(n) free(n); n = NULL; #define S_ALLOCA_N(type,n) (type*)alloca(sizeof(type)*(n)) #define S_MEMZERO(p,type,n) memset((p), 0, sizeof(type)*(n)) #define S_MEMCPY(p1,p2,type,n) memcpy((p1), (p2), sizeof(type)*(n)) #define S_MEMMOVE(p1,p2,type,n) memmove((p1), (p2), sizeof(type)*(n)) #define S_MEMCMP(p1,p2,type,n) memcmp((p1), (p2), sizeof(type)*(n)) typedef struct { void *name; void *attributes; } hpricot_element; typedef void (*hpricot_element_cb)(void *data, hpricot_element *token); typedef struct hpricot_scan { int lineno; int cs; size_t nread; size_t mark; void *data; hpricot_element_cb xmldecl; hpricot_element_cb doctype; hpricot_element_cb xmlprocins; hpricot_element_cb starttag; hpricot_element_cb endtag; hpricot_element_cb emptytag; hpricot_element_cb comment; hpricot_element_cb cdata; } http_scan; // int hpricot_scan_init(hpricot_scan *scan); // int hpricot_scan_finish(hpricot_scan *scan); // size_t hpricot_scan_execute(hpricot_scan *scan, const char *data, size_t len, size_t off); // int hpricot_scan_has_error(hpricot_scan *scan); // int hpricot_scan_is_finished(hpricot_scan *scan); // // #define hpricot_scan_nread(scan) (scan)->nread #endif hpricot-0.8.6/lib/0000755000175000017500000000000011710073440013316 5ustar boutilboutilhpricot-0.8.6/lib/hpricot.rb0000644000175000017500000000207111710073440015313 0ustar boutilboutil# == About hpricot.rb # # All of Hpricot's various part are loaded when you use require 'hpricot'. # # * hpricot_scan: the scanner (a C extension for Ruby) which turns an HTML stream into tokens. # * hpricot/parse.rb: uses the scanner to sort through tokens and give you back a complete document object. # * hpricot/tag.rb: sets up objects for the various types of elements in an HTML document. # * hpricot/modules.rb: categorizes the various elements using mixins. # * hpricot/traverse.rb: methods for searching documents. # * hpricot/elements.rb: methods for dealing with a group of elements as an Hpricot::Elements list. # * hpricot/inspect.rb: methods for displaying documents in a readable form. # If available, Nikolai's UTF-8 library will ease use of utf-8 documents. # See http://git.bitwi.se/ruby-character-encodings.git/. begin require 'encoding/character/utf-8' rescue LoadError end require 'hpricot_scan' require 'hpricot/tag' require 'hpricot/modules' require 'hpricot/traverse' require 'hpricot/inspect' require 'hpricot/parse' require 'hpricot/builder' hpricot-0.8.6/lib/hpricot/0000755000175000017500000000000011710073440014766 5ustar boutilboutilhpricot-0.8.6/lib/hpricot/xchar.rb0000644000175000017500000000607011710073440016423 0ustar boutilboutil#!/usr/bin/env ruby # The XChar library is provided courtesy of Sam Ruby (See # http://intertwingly.net/stories/2005/09/28/xchar.rb) # -------------------------------------------------------------------- ###################################################################### module Hpricot #################################################################### # XML Character converter, from Sam Ruby: # (see http://intertwingly.net/stories/2005/09/28/xchar.rb). # module XChar # :nodoc: # See # http://intertwingly.net/stories/2004/04/14/i18n.html#CleaningWindows # for details. CP1252 = { # :nodoc: 128 => 8364, # euro sign 130 => 8218, # single low-9 quotation mark 131 => 402, # latin small letter f with hook 132 => 8222, # double low-9 quotation mark 133 => 8230, # horizontal ellipsis 134 => 8224, # dagger 135 => 8225, # double dagger 136 => 710, # modifier letter circumflex accent 137 => 8240, # per mille sign 138 => 352, # latin capital letter s with caron 139 => 8249, # single left-pointing angle quotation mark 140 => 338, # latin capital ligature oe 142 => 381, # latin capital letter z with caron 145 => 8216, # left single quotation mark 146 => 8217, # right single quotation mark 147 => 8220, # left double quotation mark 148 => 8221, # right double quotation mark 149 => 8226, # bullet 150 => 8211, # en dash 151 => 8212, # em dash 152 => 732, # small tilde 153 => 8482, # trade mark sign 154 => 353, # latin small letter s with caron 155 => 8250, # single right-pointing angle quotation mark 156 => 339, # latin small ligature oe 158 => 382, # latin small letter z with caron 159 => 376, # latin capital letter y with diaeresis } # See http://www.w3.org/TR/REC-xml/#dt-chardata for details. PREDEFINED = { 34 => '"', # quotation mark 38 => '&', # ampersand 60 => '<', # left angle bracket 62 => '>' # right angle bracket } PREDEFINED_U = PREDEFINED.inject({}) { |hsh, (k, v)| hsh[v] = k; hsh } # See http://www.w3.org/TR/REC-xml/#charsets for details. VALID = [ 0x9, 0xA, 0xD, (0x20..0xD7FF), (0xE000..0xFFFD), (0x10000..0x10FFFF) ] end class << self # XML escaped version of chr def xchr(str) n = XChar::CP1252[str] || str case n when *XChar::VALID XChar::PREDEFINED[n] or (n<128 ? n.chr : "&##{n};") else '*' end end # XML escaped version of to_s def xs(str) str.to_s.unpack('U*').map {|n| xchr(n)}.join # ASCII, UTF-8 rescue str.to_s.unpack('C*').map {|n| xchr(n)}.join # ISO-8859-1, WIN-1252 end # XML unescape def uxs(str) str.to_s. gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || 63).chr }. # 63 = ?? (query char) gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }. gsub(/\&\#x([0-9a-fA-F]+);/) { [$1.to_i(16)].pack("U*") } end end end hpricot-0.8.6/lib/hpricot/traverse.rb0000644000175000017500000006114711710073440017157 0ustar boutilboutilrequire 'hpricot/elements' require 'uri' module Hpricot module Traverse # Is this object the enclosing HTML or XML document? def doc?() Doc::Trav === self end # Is this object an HTML or XML element? def elem?() Elem::Trav === self end # Is this object an HTML text node? def text?() Text::Trav === self end # Is this object an XML declaration? def xmldecl?() XMLDecl::Trav === self end # Is this object a doctype tag? def doctype?() DocType::Trav === self end # Is this object an XML processing instruction? def procins?() ProcIns::Trav === self end # Is this object a comment? def comment?() Comment::Trav === self end # Is this object a stranded end tag? def bogusetag?() BogusETag::Trav === self end # Parses an HTML string, making an HTML fragment based on # the options used to create the container document. def make(input = nil, &blk) if parent and parent.respond_to? :make parent.make(input, &blk) else Hpricot.make(input, &blk).children end end # Builds an HTML string from this node and its contents. # If you need to write to a stream, try calling output(io) # as a method on this object. def to_html output("") end alias_method :to_s, :to_html # Attempts to preserve the original HTML of the document, only # outputing new tags for elements which have changed. def to_original_html output("", :preserve => true) end def index(name) i = 0 return i if name == "*" children.each do |x| return i if (x.respond_to?(:name) and name == x.name) or (x.text? and name == "text()") i += 1 end if children -1 end # Puts together an array of neighboring nodes based on their proximity # to this node. So, for example, to get the next node, you could use # nodes_at(1). Or, to get the previous node, use nodes_at(1). # # This method also accepts ranges and sets of numbers. # # ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after # ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node # ele.nodes_at(0, 5..6) # the current node and two others def nodes_at(*pos) sib = parent.children i, si = 0, sib.index(self) pos.map! do |r| if r.is_a?(Range) and r.begin.is_a?(String) r = Range.new(parent.index(r.begin)-si, parent.index(r.end)-si, r.exclude_end?) end r end p pos Elements[* sib.select do |x| sel = case i - si when *pos true end i += 1 sel end ] end # Returns the node neighboring this node to the south: just below it. # This method includes text nodes and comments and such. def next sib = parent.children sib[sib.index(self) + 1] if parent end alias_method :next_node, :next # Returns to node neighboring this node to the north: just above it. # This method includes text nodes and comments and such. def previous sib = parent.children x = sib.index(self) - 1 sib[x] if sib and x >= 0 end alias_method :previous_node, :previous # Find all preceding nodes. def preceding sibs = parent.children si = sibs.index(self) return Elements[*sibs[0...si]] end # Find all nodes which follow the current one. def following sibs = parent.children si = sibs.index(self) + 1 return Elements[*sibs[si...sibs.length]] end # Adds elements immediately after this element, contained in the +html+ string. def after(html = nil, &blk) parent.insert_after(make(html, &blk), self) end # Adds elements immediately before this element, contained in the +html+ string. def before(html = nil, &blk) parent.insert_before(make(html, &blk), self) end # Replace this element and its contents with the nodes contained # in the +html+ string. def swap(html = nil, &blk) parent.altered! parent.replace_child(self, make(html, &blk)) end def get_subnode(*indexes) n = self indexes.each {|index| n = n.get_subnode_internal(index) } n end # Builds a string from the text contained in this node. All # HTML elements are removed. def to_plain_text if respond_to?(:children) and children children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n") else "" end end # Builds a string from the text contained in this node. All # HTML elements are removed. def inner_text if respond_to?(:children) and children children.map { |x| x.inner_text }.join else "" end end alias_method :innerText, :inner_text # Builds an HTML string from the contents of this node. def html(inner = nil, &blk) if inner or blk altered! case inner when Array self.children = inner else self.children = make(inner, &blk) end reparent self.children else if respond_to?(:children) and children children.map { |x| x.output("") }.join else "" end end end alias_method :inner_html, :html alias_method :innerHTML, :inner_html # Inserts new contents into the current node, based on # the HTML contained in string +inner+. def inner_html=(inner) html(inner || []) end alias_method :innerHTML=, :inner_html= def reparent(nodes) return unless nodes altered! [*nodes].each { |e| e.parent = self } end private :reparent def clean_path(path) path.gsub(/^\s+|\s+$/, '') end # Builds a unique XPath string for this node, from the # root of the document containing it. def xpath if elem? and has_attribute? 'id' "//#{self.name}[@id='#{get_attribute('id')}']" else sim, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = File.join(parent.xpath, self.pathname) p += "[#{id+1}]" if sim >= 2 p end end # Builds a unique CSS string for this node, from the # root of the document containing it. def css_path if elem? and has_attribute? 'id' "##{get_attribute('id')}" else sim, i, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = parent.css_path p = p ? "#{p} > #{self.pathname}" : self.pathname p += ":nth(#{id})" if sim >= 2 p end end def node_position parent.children.index(self) end def position parent.children_of_type(self.pathname).index(self) end # Searches this node for all elements matching # the CSS or XPath +expr+. Returns an Elements array # containing the matching nodes. If +blk+ is given, it # is used to iterate through the matching set. def search(expr, &blk) if Range === expr return Elements.expand(at(expr.begin), at(expr.end), expr.exclude_end?) end last = nil nodes = [self] done = [] expr = expr.to_s hist = [] until expr.empty? expr = clean_path(expr) expr.gsub!(%r!^//!, '') case expr when %r!^/?\.\.! last = expr = $' nodes.map! { |node| node.parent } when %r!^[>/]\s*! last = expr = $' nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact] when %r!^\+! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[siblings.index(node)+1] end nodes.compact! when %r!^~! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[(siblings.index(node)+1)..-1] end nodes.flatten! when %r!^[|,]! last = expr = " #$'" nodes.shift if nodes.first == self done += nodes nodes = [self] else m = expr.match(%r!^([#.]?)([a-z0-9\\*_-]*)!i).to_a after = $' mt = after[%r!:[a-z0-9\\*_-]+!i, 0] oop = false if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]") after = $' m[2] += mt expr = after end if m[1] == '#' oid = get_element_by_id(m[2]) nodes = oid ? [oid] : [] expr = after else m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "." ret = [] nodes.each do |node| case m[2] when '*' node.traverse_element { |n| ret << n } else if node.respond_to? :get_elements_by_tag_name ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)] end end end nodes = ret end last = nil end hist << expr break if hist[-1] == hist[-2] nodes, expr = Elements.filter(nodes, expr) end nodes = done + nodes.flatten.uniq if blk nodes.each(&blk) self else Elements[*nodes] end end alias_method :/, :search # Find the first matching node for the CSS or XPath # +expr+ string. def at(expr) search(expr).first end alias_method :%, :at # +traverse_element+ traverses elements in the tree. # It yields elements in depth first order. # # If _names_ are empty, it yields all elements. # If non-empty _names_ are given, it should be list of universal names. # # A nested element is yielded in depth first order as follows. # # t = Hpricot('') # t.traverse_element("a", "c") {|e| p e} # # => # {elem {elem {emptyelem } } {emptyelem } } # {emptyelem } # {emptyelem } # # Universal names are specified as follows. # # t = Hpricot(<<'End') # # # # # End # t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e} # # => # {emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">} # {emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">} # def traverse_element(*names, &block) # :yields: element if names.empty? traverse_all_element(&block) else name_set = {} names.each {|n| name_set[n] = true } traverse_some_element(name_set, &block) end nil end # Find children of a given +tag_name+. # # ele.children_of_type('p') # #=> [...array of paragraphs...] # def children_of_type(tag_name) if respond_to? :children children.find_all do |x| x.respond_to?(:pathname) && x.pathname == tag_name end end end end module Container::Trav # Return all children of this node which can contain other # nodes. This is a good way to get all HTML elements which # aren't text, comment, doctype or processing instruction nodes. def containers children.grep(Container::Trav) end # Returns the container node neighboring this node to the south: just below it. # By "container" node, I mean: this method does not find text nodes or comments or cdata or any of that. # See Hpricot::Traverse#next_node if you need to hunt out all kinds of nodes. def next_sibling sib = parent.containers sib[sib.index(self) + 1] if parent end # Returns the container node neighboring this node to the north: just above it. # By "container" node, I mean: this method does not find text nodes or comments or cdata or any of that. # See Hpricot::Traverse#previous_node if you need to hunt out all kinds of nodes. def previous_sibling sib = parent.containers x = sib.index(self) - 1 sib[x] if sib and x >= 0 end # Find all preceding sibling elements. Like the other "sibling" methods, this weeds # out text and comment nodes. def preceding_siblings() sibs = parent.containers si = sibs.index(self) return Elements[*sibs[0...si]] end # Find sibling elements which follow the current one. Like the other "sibling" methods, this weeds # out text and comment nodes. def following_siblings() sibs = parent.containers si = sibs.index(self) + 1 return Elements[*sibs[si...sibs.length]] end # Puts together an array of neighboring sibling elements based on their proximity # to this element. # # This method accepts ranges and sets of numbers. # # ele.siblings_at(-3..-1, 1..3) # gets three elements before and three after # ele.siblings_at(1, 5, 7) # gets three elements at offsets below the current element # ele.siblings_at(0, 5..6) # the current element and two others # # Like the other "sibling" methods, this doesn't find text and comment nodes. # Use nodes_at to include those nodes. def siblings_at(*pos) sib = parent.containers i, si = 0, sib.index(self) Elements[* sib.select do |x| sel = case i - si when *pos true end i += 1 sel end ] end # Replace +old+, a child of the current node, with +new+ node. def replace_child(old, new) reparent new children[children.index(old), 1] = [*new] end # Insert +nodes+, an array of HTML elements or a single element, # before the node +ele+, a child of the current node. def insert_before(nodes, ele) case nodes when Array nodes.each { |n| insert_before(n, ele) } else reparent nodes children[children.index(ele) || 0, 0] = nodes end end # Insert +nodes+, an array of HTML elements or a single element, # after the node +ele+, a child of the current node. def insert_after(nodes, ele) case nodes when Array nodes.reverse_each { |n| insert_after(n, ele) } else reparent nodes idx = children.index(ele) children[idx ? idx + 1 : children.length, 0] = nodes end end # +each_child+ iterates over each child. def each_child(&block) # :yields: child_node children.each(&block) if children nil end # +each_child_with_index+ iterates over each child. def each_child_with_index(&block) # :yields: child_node, index children.each_with_index(&block) if children nil end # +find_element+ searches an element which universal name is specified by # the arguments. # It returns nil if not found. def find_element(*names) traverse_element(*names) {|e| return e } nil end # Returns a list of CSS classes to which this element belongs. def classes get_attribute('class').to_s.strip.split(/\s+/) end def get_element_by_id(id) traverse_all_element do |ele| if ele.elem? and eid = ele.get_attribute('id') return ele if eid.to_s == id end end nil end def get_elements_by_tag_name(*a) list = Elements[] a.delete("*") traverse_element(*a.map { |tag| [tag, "{http://www.w3.org/1999/xhtml}#{tag}"] }.flatten) do |e| list << e if e.elem? end list end def each_hyperlink_attribute traverse_element( '{http://www.w3.org/1999/xhtml}a', '{http://www.w3.org/1999/xhtml}area', '{http://www.w3.org/1999/xhtml}link', '{http://www.w3.org/1999/xhtml}img', '{http://www.w3.org/1999/xhtml}object', '{http://www.w3.org/1999/xhtml}q', '{http://www.w3.org/1999/xhtml}blockquote', '{http://www.w3.org/1999/xhtml}ins', '{http://www.w3.org/1999/xhtml}del', '{http://www.w3.org/1999/xhtml}form', '{http://www.w3.org/1999/xhtml}input', '{http://www.w3.org/1999/xhtml}head', '{http://www.w3.org/1999/xhtml}base', '{http://www.w3.org/1999/xhtml}script') {|elem| case elem.name when %r{\{http://www.w3.org/1999/xhtml\}(?:base|a|area|link)\z}i attrs = ['href'] when %r{\{http://www.w3.org/1999/xhtml\}(?:img)\z}i attrs = ['src', 'longdesc', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:object)\z}i attrs = ['classid', 'codebase', 'data', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:q|blockquote|ins|del)\z}i attrs = ['cite'] when %r{\{http://www.w3.org/1999/xhtml\}(?:form)\z}i attrs = ['action'] when %r{\{http://www.w3.org/1999/xhtml\}(?:input)\z}i attrs = ['src', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:head)\z}i attrs = ['profile'] when %r{\{http://www.w3.org/1999/xhtml\}(?:script)\z}i attrs = ['src', 'for'] end attrs.each {|attr| if hyperlink = elem.get_attribute(attr) yield elem, attr, hyperlink end } } end private :each_hyperlink_attribute # +each_hyperlink_uri+ traverses hyperlinks such as HTML href attribute # of A element. # # It yields Hpricot::Text and URI for each hyperlink. # # The URI objects are created with a base URI which is given by # HTML BASE element or the argument ((|base_uri|)). # +each_hyperlink_uri+ doesn't yields href of the BASE element. def each_hyperlink_uri(base_uri=nil) # :yields: hyperlink, uri base_uri = URI.parse(base_uri) if String === base_uri links = [] each_hyperlink_attribute {|elem, attr, hyperlink| if %r{\{http://www.w3.org/1999/xhtml\}(?:base)\z}i =~ elem.name base_uri = URI.parse(hyperlink.to_s) else links << hyperlink end } if base_uri links.each {|hyperlink| yield hyperlink, base_uri + hyperlink.to_s } else links.each {|hyperlink| yield hyperlink, URI.parse(hyperlink.to_s) } end end # +each_hyperlink+ traverses hyperlinks such as HTML href attribute # of A element. # # It yields Hpricot::Text. # # Note that +each_hyperlink+ yields HTML href attribute of BASE element. def each_hyperlink # :yields: text links = [] each_hyperlink_attribute {|elem, attr, hyperlink| yield hyperlink } end # +each_uri+ traverses hyperlinks such as HTML href attribute # of A element. # # It yields URI for each hyperlink. # # The URI objects are created with a base URI which is given by # HTML BASE element or the argument ((|base_uri|)). def each_uri(base_uri=nil) # :yields: URI each_hyperlink_uri(base_uri) {|hyperlink, uri| yield uri } end end # :stopdoc: module Doc::Trav def traverse_all_element(&block) children.each {|c| c.traverse_all_element(&block) } if children end def xpath "/" end def css_path nil end end module Elem::Trav def traverse_all_element(&block) yield self children.each {|c| c.traverse_all_element(&block) } if children end end module Leaf::Trav def traverse_all_element yield self end end module Doc::Trav def traverse_some_element(name_set, &block) children.each {|c| c.traverse_some_element(name_set, &block) } if children end end module Elem::Trav def traverse_some_element(name_set, &block) yield self if name_set.include? self.name children.each {|c| c.traverse_some_element(name_set, &block) } if children end end module Leaf::Trav def traverse_some_element(name_set) end end # :startdoc: module Traverse # +traverse_text+ traverses texts in the tree def traverse_text(&block) # :yields: text traverse_text_internal(&block) nil end end # :stopdoc: module Container::Trav def traverse_text_internal(&block) each_child {|c| c.traverse_text_internal(&block) } end end module Leaf::Trav def traverse_text_internal end end module Text::Trav def traverse_text_internal yield self end end # :startdoc: module Container::Trav # +filter+ rebuilds the tree without some components. # # node.filter {|descendant_node| predicate } -> node # loc.filter {|descendant_loc| predicate } -> node # # +filter+ yields each node except top node. # If given block returns false, corresponding node is dropped. # If given block returns true, corresponding node is retained and # inner nodes are examined. # # +filter+ returns an node. # It doesn't return location object even if self is location object. # def filter(&block) subst = {} each_child_with_index {|descendant, i| if yield descendant if descendant.elem? subst[i] = descendant.filter(&block) else subst[i] = descendant end else subst[i] = nil end } to_node.subst_subnode(subst) end end module Doc::Trav # +title+ searches title and return it as a text. # It returns nil if not found. # # +title+ searchs following information. # # - ... in HTML # - ... in RSS def title e = find_element('title', '{http://www.w3.org/1999/xhtml}title', '{http://purl.org/rss/1.0/}title', '{http://my.netscape.com/rdf/simple/0.9/}title') e && e.extract_text end # +author+ searches author and return it as a text. # It returns nil if not found. # # +author+ searchs following information. # # - in HTML # - in HTML # - author-name in RSS # - author-name in RSS def author traverse_element('meta', '{http://www.w3.org/1999/xhtml}meta') {|e| begin next unless e.fetch_attr('name').downcase == 'author' author = e.fetch_attribute('content').strip return author if !author.empty? rescue IndexError end } traverse_element('link', '{http://www.w3.org/1999/xhtml}link') {|e| begin next unless e.fetch_attr('rev').downcase == 'made' author = e.fetch_attribute('title').strip return author if !author.empty? rescue IndexError end } if channel = find_element('{http://purl.org/rss/1.0/}channel') channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e| begin author = e.extract_text.strip return author if !author.empty? rescue IndexError end } channel.traverse_element('{http://purl.org/dc/elements/1.1/}publisher') {|e| begin author = e.extract_text.strip return author if !author.empty? rescue IndexError end } end nil end end module Doc::Trav def root es = [] children.each {|c| es << c if c.elem? } if children raise Hpricot::Error, "no element" if es.empty? raise Hpricot::Error, "multiple top elements" if 1 < es.length es[0] end end module Elem::Trav def has_attribute?(name) self.raw_attributes && self.raw_attributes.has_key?(name.to_s) end def get_attribute(name) a = self.raw_attributes && self.raw_attributes[name.to_s] a = Hpricot.uxs(a) if a a end alias_method :[], :get_attribute def set_attribute(name, val) altered! self.raw_attributes ||= {} self.raw_attributes[name.to_s] = val.fast_xs end alias_method :[]=, :set_attribute def remove_attribute(name) name = name.to_s if has_attribute? name altered! self.raw_attributes.delete(name) end end end end hpricot-0.8.6/lib/hpricot/tags.rb0000644000175000017500000001447611710073440016265 0ustar boutilboutilmodule Hpricot FORM_TAGS = [ :form, :input, :select, :textarea ] SELF_CLOSING_TAGS = [ :base, :meta, :link, :hr, :br, :param, :img, :area, :input, :col ] # Common sets of attributes. AttrCore = [:id, :class, :style, :title] AttrI18n = [:lang, 'xml:lang'.intern, :dir] AttrEvents = [:onclick, :ondblclick, :onmousedown, :onmouseup, :onmouseover, :onmousemove, :onmouseout, :onkeypress, :onkeydown, :onkeyup] AttrFocus = [:accesskey, :tabindex, :onfocus, :onblur] AttrHAlign = [:align, :char, :charoff] AttrVAlign = [:valign] Attrs = AttrCore + AttrI18n + AttrEvents # All the tags and attributes from XHTML 1.0 Strict class XHTMLStrict class << self attr_accessor :tags, :tagset, :forms, :self_closing, :doctype end @doctype = ["-//W3C//DTD XHTML 1.0 Strict//EN", "DTD/xhtml1-strict.dtd"] @tagset = { :html => AttrI18n + [:id, :xmlns], :head => AttrI18n + [:id, :profile], :title => AttrI18n + [:id], :base => [:href, :id], :meta => AttrI18n + [:id, :http, :name, :content, :scheme, 'http-equiv'.intern], :link => Attrs + [:charset, :href, :hreflang, :type, :rel, :rev, :media], :style => AttrI18n + [:id, :type, :media, :title, 'xml:space'.intern], :script => [:id, :charset, :type, :src, :defer, 'xml:space'.intern], :noscript => Attrs, :body => Attrs + [:onload, :onunload], :div => Attrs, :p => Attrs, :ul => Attrs, :ol => Attrs, :li => Attrs, :dl => Attrs, :dt => Attrs, :dd => Attrs, :address => Attrs, :hr => Attrs, :pre => Attrs + ['xml:space'.intern], :blockquote => Attrs + [:cite], :ins => Attrs + [:cite, :datetime], :del => Attrs + [:cite, :datetime], :a => Attrs + AttrFocus + [:charset, :type, :name, :href, :hreflang, :rel, :rev, :shape, :coords], :span => Attrs, :bdo => AttrCore + AttrEvents + [:lang, 'xml:lang'.intern, :dir], :br => AttrCore, :em => Attrs, :strong => Attrs, :dfn => Attrs, :code => Attrs, :samp => Attrs, :kbd => Attrs, :var => Attrs, :cite => Attrs, :abbr => Attrs, :acronym => Attrs, :q => Attrs + [:cite], :sub => Attrs, :sup => Attrs, :tt => Attrs, :i => Attrs, :b => Attrs, :big => Attrs, :small => Attrs, :object => Attrs + [:declare, :classid, :codebase, :data, :type, :codetype, :archive, :standby, :height, :width, :usemap, :name, :tabindex], :param => [:id, :name, :value, :valuetype, :type], :img => Attrs + [:src, :alt, :longdesc, :height, :width, :usemap, :ismap], :map => AttrI18n + AttrEvents + [:id, :class, :style, :title, :name], :area => Attrs + AttrFocus + [:shape, :coords, :href, :nohref, :alt], :form => Attrs + [:action, :method, :enctype, :onsubmit, :onreset, :accept, :accept], :label => Attrs + [:for, :accesskey, :onfocus, :onblur], :input => Attrs + AttrFocus + [:type, :name, :value, :checked, :disabled, :readonly, :size, :maxlength, :src, :alt, :usemap, :onselect, :onchange, :accept], :select => Attrs + [:name, :size, :multiple, :disabled, :tabindex, :onfocus, :onblur, :onchange], :optgroup => Attrs + [:disabled, :label], :option => Attrs + [:selected, :disabled, :label, :value], :textarea => Attrs + AttrFocus + [:name, :rows, :cols, :disabled, :readonly, :onselect, :onchange], :fieldset => Attrs, :legend => Attrs + [:accesskey], :button => Attrs + AttrFocus + [:name, :value, :type, :disabled], :table => Attrs + [:summary, :width, :border, :frame, :rules, :cellspacing, :cellpadding], :caption => Attrs, :colgroup => Attrs + AttrHAlign + AttrVAlign + [:span, :width], :col => Attrs + AttrHAlign + AttrVAlign + [:span, :width], :thead => Attrs + AttrHAlign + AttrVAlign, :tfoot => Attrs + AttrHAlign + AttrVAlign, :tbody => Attrs + AttrHAlign + AttrVAlign, :tr => Attrs + AttrHAlign + AttrVAlign, :th => Attrs + AttrHAlign + AttrVAlign + [:abbr, :axis, :headers, :scope, :rowspan, :colspan], :td => Attrs + AttrHAlign + AttrVAlign + [:abbr, :axis, :headers, :scope, :rowspan, :colspan], :h1 => Attrs, :h2 => Attrs, :h3 => Attrs, :h4 => Attrs, :h5 => Attrs, :h6 => Attrs } @tags = @tagset.keys @forms = @tags & FORM_TAGS @self_closing = @tags & SELF_CLOSING_TAGS end # Additional tags found in XHTML 1.0 Transitional class XHTMLTransitional class << self attr_accessor :tags, :tagset, :forms, :self_closing, :doctype end @doctype = ["-//W3C//DTD XHTML 1.0 Transitional//EN", "DTD/xhtml1-transitional.dtd"] @tagset = XHTMLStrict.tagset.merge \ :strike => Attrs, :center => Attrs, :dir => Attrs + [:compact], :noframes => Attrs, :basefont => [:id, :size, :color, :face], :u => Attrs, :menu => Attrs + [:compact], :iframe => AttrCore + [:longdesc, :name, :src, :frameborder, :marginwidth, :marginheight, :scrolling, :align, :height, :width], :font => AttrCore + AttrI18n + [:size, :color, :face], :s => Attrs, :applet => AttrCore + [:codebase, :archive, :code, :object, :alt, :name, :width, :height, :align, :hspace, :vspace], :isindex => AttrCore + AttrI18n + [:prompt] # Additional attributes found in XHTML 1.0 Transitional { :script => [:language], :a => [:target], :td => [:bgcolor, :nowrap, :width, :height], :p => [:align], :h5 => [:align], :h3 => [:align], :li => [:type, :value], :div => [:align], :pre => [:width], :body => [:background, :bgcolor, :text, :link, :vlink, :alink], :ol => [:type, :compact, :start], :h4 => [:align], :h2 => [:align], :object => [:align, :border, :hspace, :vspace], :img => [:name, :align, :border, :hspace, :vspace], :link => [:target], :legend => [:align], :dl => [:compact], :input => [:align], :h6 => [:align], :hr => [:align, :noshade, :size, :width], :base => [:target], :ul => [:type, :compact], :br => [:clear], :form => [:name, :target], :area => [:target], :h1 => [:align] }.each do |k, v| @tagset[k] += v end @tags = @tagset.keys @forms = @tags & FORM_TAGS @self_closing = @tags & SELF_CLOSING_TAGS end end hpricot-0.8.6/lib/hpricot/tag.rb0000644000175000017500000001176711710073440016102 0ustar boutilboutilmodule Hpricot # :stopdoc: class Doc def output(out, opts = {}) children.each do |n| n.output(out, opts) end if children out end def make(input = nil, &blk) Hpricot.make(input, @options, &blk).children end def altered!; end def inspect_tree children.map { |x| x.inspect_tree }.join if children end end module Node def html_quote(str) "\"" + str.gsub('"', '\\"') + "\"" end def clear_raw; end def if_output(opts) if opts[:preserve] and not raw_string.nil? raw_string else yield opts end end def pathname; self.name end def altered! clear_raw end def inspect_tree(depth = 0) %{#{" " * depth}} + self.class.name.split(/::/).last.downcase + "\n" end end class Attributes attr_accessor :element def initialize e @element = e end def [] k Hpricot.uxs((@element.raw_attributes || {})[k]) end def []= k, v (@element.raw_attributes ||= {})[k] = v.fast_xs end def to_hash if @element.raw_attributes @element.raw_attributes.inject({}) do |hsh, (k, v)| hsh[k] = Hpricot.uxs(v) hsh end else {} end end def to_s to_hash.to_s end def inspect to_hash.inspect end end class Elem def initialize tag, attrs = nil, children = nil, etag = nil self.name, self.raw_attributes, self.children, self.etag = tag, attrs, children, etag end def empty?; children.nil? or children.empty? end def attributes Attributes.new self end def to_plain_text if self.name == 'br' "\n" elsif self.name == 'p' "\n\n" + super + "\n\n" elsif self.name == 'a' and self.has_attribute?('href') "#{super} [#{self['href']}]" elsif self.name == 'img' and self.has_attribute?('src') "[img:#{self['src']}]" else super end end def pathname; self.name end def output(out, opts = {}) out << if_output(opts) do "<#{name}#{attributes_as_html}" + ((empty? and not etag) ? " /" : "") + ">" end if children children.each { |n| n.output(out, opts) } end if opts[:preserve] out << etag if etag elsif etag or !empty? out << "" end out end def attributes_as_html if raw_attributes raw_attributes.map do |aname, aval| " #{aname}" + (aval ? "=#{html_quote aval}" : "") end.join end end def inspect_tree(depth = 0) %{#{" " * depth}} + name + "\n" + (children ? children.map { |x| x.inspect_tree(depth + 1) }.join : "") end end class BogusETag def initialize name; self.name = name end def output(out, opts = {}) out << if_output(opts) { "" } end end class ETag < BogusETag def output(out, opts = {}); out << if_output(opts) { '' }; end end class Text def initialize content; self.content = content end def pathname; "text()" end def to_s Hpricot.uxs(content) end alias_method :inner_text, :to_s alias_method :to_plain_text, :to_s def << str; self.content << str end def output(out, opts = {}) out << if_output(opts) do content.to_s end end end class CData def initialize content; self.content = content end alias_method :to_s, :content alias_method :to_plain_text, :content alias_method :inner_text, :content def raw_string; "" end def output(out, opts = {}) out << if_output(opts) do "" end end end class XMLDecl def pathname; "xmldecl()" end def output(out, opts = {}) out << if_output(opts) do "" end end end class DocType def initialize target, pub, sys self.target, self.public_id, self.system_id = target, pub, sys end def pathname; "doctype()" end def output(out, opts = {}) out << if_output(opts) do "" end end end class ProcIns def pathname; "procins()" end def raw_string; output("") end def output(out, opts = {}) out << if_output(opts) do "" end end end class Comment def pathname; "comment()" end def raw_string; "" end def output(out, opts = {}) out << if_output(opts) do "" end end end # :startdoc: end hpricot-0.8.6/lib/hpricot/parse.rb0000644000175000017500000000166211710073440016432 0ustar boutilboutilrequire 'hpricot/htmlinfo' def Hpricot(input = nil, opts = {}, &blk) Hpricot.make(input, opts, &blk) end module Hpricot # Exception class used for any errors related to deficiencies in the system when # handling the character encodings of a document. class EncodingError < StandardError; end # Hpricot.parse parses input and return a document tree. # represented by Hpricot::Doc. def Hpricot.parse(input = nil, opts = {}, &blk) make(input, opts, &blk) end # Hpricot::XML parses input, disregarding all the HTML rules # and returning a document tree. def Hpricot.XML(input = nil, opts = {}, &blk) opts.merge! :xml => true make(input, opts, &blk) end # :stopdoc: def Hpricot.make(input = nil, opts = {}, &blk) if blk doc = Hpricot.build(&blk) doc.instance_variable_set("@options", opts) doc else Hpricot.scan(input, opts) end end # :startdoc: end hpricot-0.8.6/lib/hpricot/modules.rb0000644000175000017500000000276211710073440016772 0ustar boutilboutilmodule Hpricot class Name; include Hpricot end class Context; include Hpricot end # :stopdoc: module Tag; include Hpricot end class ETag; include Tag end # :startdoc: module Node; include Hpricot end class ETag; include Node end module Container; include Node end class Doc; include Container end class Elem; include Container end module Leaf; include Node end class CData; include Leaf end class Text; include Leaf end class XMLDecl; include Leaf end class DocType; include Leaf end class ProcIns; include Leaf end class Comment; include Leaf end class BogusETag; include Leaf end module Traverse end module Container::Trav; include Traverse end module Leaf::Trav; include Traverse end class Doc; module Trav; include Container::Trav end; include Trav end class Elem; module Trav; include Container::Trav end; include Trav end class CData; module Trav; include Leaf::Trav end; include Trav end class Text; module Trav; include Leaf::Trav end; include Trav end class XMLDecl; module Trav; include Leaf::Trav end; include Trav end class DocType; module Trav; include Leaf::Trav end; include Trav end class ProcIns; module Trav; include Leaf::Trav end; include Trav end class Comment; module Trav; include Leaf::Trav end; include Trav end class BogusETag; module Trav; include Leaf::Trav end; include Trav end class Error < StandardError; end end hpricot-0.8.6/lib/hpricot/inspect.rb0000644000175000017500000000406311710073440016763 0ustar boutilboutilrequire 'pp' module Hpricot # :stopdoc: class Elements def pretty_print(q) q.object_group(self) { super } end alias inspect pretty_print_inspect end class Doc def pretty_print(q) q.object_group(self) { children.each {|elt| q.breakable; q.pp elt } if children } end alias inspect pretty_print_inspect end module Leaf def pretty_print(q) q.group(1, '{', '}') { q.text self.class.name.sub(/.*::/,'').downcase if rs = raw_string rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line| q.breakable q.pp line } elsif self.respond_to? :to_s q.breakable q.text self.to_s end } end alias inspect pretty_print_inspect end class Elem def pretty_print(q) if empty? q.group(1, '{emptyelem', '}') { q.breakable; pretty_print_stag q } else q.group(1, "{elem", "}") { q.breakable; pretty_print_stag q if children children.each {|elt| q.breakable; q.pp elt } end if etag q.breakable; q.text etag end } end end def pretty_print_stag(q) q.group(1, '<', '>') { q.text name if raw_attributes raw_attributes.each {|n, t| q.breakable if t q.text "#{n}=\"#{Hpricot.uxs(t)}\"" else q.text n end } end } end alias inspect pretty_print_inspect end class ETag def pretty_print(q) q.group(1, '') { q.text name } end alias inspect pretty_print_inspect end class Text def pretty_print(q) q.text content.dump end end class BogusETag def pretty_print(q) q.group(1, '{', '}') { q.text self.class.name.sub(/.*::/,'').downcase if rs = raw_string q.breakable q.text rs else q.text "" end } end end # :startdoc: end hpricot-0.8.6/lib/hpricot/htmlinfo.rb0000644000175000017500000011145311710073440017140 0ustar boutilboutilmodule Hpricot # The code below is auto-generated. Don't edit manually. # :stopdoc: NamedCharacters = {"AElig"=>198, "Aacute"=>193, "Acirc"=>194, "Agrave"=>192, "Alpha"=>913, "Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199, "Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201, "Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203, "Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921, "Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925, "OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937, "Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934, "Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931, "THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219, "Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221, "Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180, "aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38, "and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776, "atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166, "bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162, "chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169, "crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224, "darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247, "eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195, "ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240, "euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704, "frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947, "ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829, "hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236, "image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191, "isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955, "lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220, "le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206, "lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212, "micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711, "nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713, "nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244, "oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959, "oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248, "otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706, "permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982, "plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733, "psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002, "raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476, "reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250, "rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167, "shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824, "sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178, "sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756, "theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732, "times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593, "ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965, "uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165, "yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204} NamedCharactersPattern = /\A(?-mix:AElig|Aacute|Acirc|Agrave|Alpha|Aring|Atilde|Auml|Beta|Ccedil|Chi|Dagger|Delta|ETH|Eacute|Ecirc|Egrave|Epsilon|Eta|Euml|Gamma|Iacute|Icirc|Igrave|Iota|Iuml|Kappa|Lambda|Mu|Ntilde|Nu|OElig|Oacute|Ocirc|Ograve|Omega|Omicron|Oslash|Otilde|Ouml|Phi|Pi|Prime|Psi|Rho|Scaron|Sigma|THORN|Tau|Theta|Uacute|Ucirc|Ugrave|Upsilon|Uuml|Xi|Yacute|Yuml|Zeta|aacute|acirc|acute|aelig|agrave|alefsym|alpha|amp|and|ang|apos|aring|asymp|atilde|auml|bdquo|beta|brvbar|bull|cap|ccedil|cedil|cent|chi|circ|clubs|cong|copy|crarr|cup|curren|dArr|dagger|darr|deg|delta|diams|divide|eacute|ecirc|egrave|empty|emsp|ensp|epsilon|equiv|eta|eth|euml|euro|exist|fnof|forall|frac12|frac14|frac34|frasl|gamma|ge|gt|hArr|harr|hearts|hellip|iacute|icirc|iexcl|igrave|image|infin|int|iota|iquest|isin|iuml|kappa|lArr|lambda|lang|laquo|larr|lceil|ldquo|le|lfloor|lowast|loz|lrm|lsaquo|lsquo|lt|macr|mdash|micro|middot|minus|mu|nabla|nbsp|ndash|ne|ni|not|notin|nsub|ntilde|nu|oacute|ocirc|oelig|ograve|oline|omega|omicron|oplus|or|ordf|ordm|oslash|otilde|otimes|ouml|para|part|permil|perp|phi|pi|piv|plusmn|pound|prime|prod|prop|psi|quot|rArr|radic|rang|raquo|rarr|rceil|rdquo|real|reg|rfloor|rho|rlm|rsaquo|rsquo|sbquo|scaron|sdot|sect|shy|sigma|sigmaf|sim|spades|sub|sube|sum|sup|sup1|sup2|sup3|supe|szlig|tau|there4|theta|thetasym|thinsp|thorn|tilde|times|trade|uArr|uacute|uarr|ucirc|ugrave|uml|upsih|upsilon|uuml|weierp|xi|yacute|yen|yuml|zeta|zwj|zwnj)\z/ ElementContent = {"h6"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "object"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "dl"=>["dd", "dt"], "p"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "acronym"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "code"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "ul"=>["li"], "tt"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "label"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "form"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "q"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "thead"=>["tr"], "area"=>:EMPTY, "td"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "title"=>[], "dir"=>["li"], "s"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "ol"=>["li"], "hr"=>:EMPTY, "applet"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "table"=>["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"], "legend"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "cite"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "a"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "html"=> ["a", "abbr", "acronym", "address", "applet", "b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "title", "tt", "u", "ul", "var"], "u"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "blockquote"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "center"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "b"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "base"=>:EMPTY, "th"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "link"=>:EMPTY, "var"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "samp"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "div"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "textarea"=>[], "pre"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "head"=>["base", "isindex", "title"], "span"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "br"=>:EMPTY, "script"=>:CDATA, "noframes"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "style"=>:CDATA, "meta"=>:EMPTY, "dt"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "option"=>[], "kbd"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "big"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "tfoot"=>["tr"], "sup"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "bdo"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "isindex"=>:EMPTY, "dfn"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "fieldset"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "legend", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "em"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "font"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "tbody"=>["tr"], "noscript"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "li"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "col"=>:EMPTY, "small"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "dd"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "i"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "menu"=>["li"], "strong"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "basefont"=>:EMPTY, "img"=>:EMPTY, "optgroup"=>["option"], "map"=> ["address", "area", "blockquote", "center", "dir", "div", "dl", "fieldset", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes", "noscript", "ol", "p", "pre", "table", "ul"], "h1"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "address"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "p", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "sub"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "param"=>:EMPTY, "input"=>:EMPTY, "h2"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "abbr"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "h3"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "strike"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "body"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "ins"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "button"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "h4"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "select"=>["optgroup", "option"], "caption"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "colgroup"=>["col"], "tr"=>["td", "th"], "del"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "h5"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "iframe"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"]} ElementContent.keys.each do |k| v = ElementContent[k] if v.is_a? Array ElementContent[k] = v.inject({}) do |h, name| h[name.hash] = true h end end end ElementInclusions = {"head"=>["link", "meta", "object", "script", "style" , "noscript"], "body"=>["del", "ins"]} ElementInclusions.each do |k, v| v.each do |name| ElementContent[k][name.hash] = :allow end end ElementExclusions = {"button"=> ["a", "button", "fieldset", "form", "iframe", "input", "isindex", "label", "select", "textarea"], "a"=>["a"], "dir"=> ["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes", "noscript", "ol", "p", "pre", "table", "ul"], "title"=>["link", "meta", "object", "script", "style"], "pre"=> ["applet", "basefont", "big", "font", "img", "object", "small", "sub", "sup"], "form"=>["form"], "menu"=> ["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes", "noscript", "ol", "p", "pre", "table", "ul"], "label"=>["label"]} ElementExclusions.each do |k, v| v.each do |name| ElementContent[k][name.hash] = :deny end end OmittedAttrName = {"h6"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "object"=> {"bottom"=>"align", "declare"=>"declare", "left"=>"align", "ltr"=>"dir", "middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"}, "dl"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"}, "p"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "acronym"=>{"ltr"=>"dir", "rtl"=>"dir"}, "code"=>{"ltr"=>"dir", "rtl"=>"dir"}, "ul"=> {"circle"=>"type", "compact"=>"compact", "disc"=>"type", "ltr"=>"dir", "rtl"=>"dir", "square"=>"type"}, "tt"=>{"ltr"=>"dir", "rtl"=>"dir"}, "label"=>{"ltr"=>"dir", "rtl"=>"dir"}, "form"=>{"get"=>"method", "ltr"=>"dir", "post"=>"method", "rtl"=>"dir"}, "q"=>{"ltr"=>"dir", "rtl"=>"dir"}, "thead"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "area"=> {"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "nohref"=>"nohref", "poly"=>"shape", "rect"=>"shape", "rtl"=>"dir"}, "td"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap", "right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir", "top"=>"valign"}, "title"=>{"ltr"=>"dir", "rtl"=>"dir"}, "dir"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"}, "s"=>{"ltr"=>"dir", "rtl"=>"dir"}, "ol"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"}, "hr"=> {"center"=>"align", "left"=>"align", "ltr"=>"dir", "noshade"=>"noshade", "right"=>"align", "rtl"=>"dir"}, "applet"=> {"bottom"=>"align", "left"=>"align", "middle"=>"align", "right"=>"align", "top"=>"align"}, "table"=> {"above"=>"frame", "all"=>"rules", "below"=>"frame", "border"=>"frame", "box"=>"frame", "center"=>"align", "cols"=>"rules", "groups"=>"rules", "hsides"=>"frame", "left"=>"align", "lhs"=>"frame", "ltr"=>"dir", "none"=>"rules", "rhs"=>"frame", "right"=>"align", "rows"=>"rules", "rtl"=>"dir", "void"=>"frame", "vsides"=>"frame"}, "legend"=> {"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir", "top"=>"align"}, "cite"=>{"ltr"=>"dir", "rtl"=>"dir"}, "a"=> {"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "poly"=>"shape", "rect"=>"shape", "rtl"=>"dir"}, "html"=>{"ltr"=>"dir", "rtl"=>"dir"}, "u"=>{"ltr"=>"dir", "rtl"=>"dir"}, "blockquote"=>{"ltr"=>"dir", "rtl"=>"dir"}, "center"=>{"ltr"=>"dir", "rtl"=>"dir"}, "b"=>{"ltr"=>"dir", "rtl"=>"dir"}, "th"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap", "right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir", "top"=>"valign"}, "link"=>{"ltr"=>"dir", "rtl"=>"dir"}, "var"=>{"ltr"=>"dir", "rtl"=>"dir"}, "samp"=>{"ltr"=>"dir", "rtl"=>"dir"}, "div"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "textarea"=> {"disabled"=>"disabled", "ltr"=>"dir", "readonly"=>"readonly", "rtl"=>"dir"}, "pre"=>{"ltr"=>"dir", "rtl"=>"dir"}, "head"=>{"ltr"=>"dir", "rtl"=>"dir"}, "span"=>{"ltr"=>"dir", "rtl"=>"dir"}, "br"=>{"all"=>"clear", "left"=>"clear", "none"=>"clear", "right"=>"clear"}, "script"=>{"defer"=>"defer"}, "noframes"=>{"ltr"=>"dir", "rtl"=>"dir"}, "style"=>{"ltr"=>"dir", "rtl"=>"dir"}, "meta"=>{"ltr"=>"dir", "rtl"=>"dir"}, "dt"=>{"ltr"=>"dir", "rtl"=>"dir"}, "option"=> {"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir", "selected"=>"selected"}, "kbd"=>{"ltr"=>"dir", "rtl"=>"dir"}, "big"=>{"ltr"=>"dir", "rtl"=>"dir"}, "tfoot"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "sup"=>{"ltr"=>"dir", "rtl"=>"dir"}, "bdo"=>{"ltr"=>"dir", "rtl"=>"dir"}, "isindex"=>{"ltr"=>"dir", "rtl"=>"dir"}, "dfn"=>{"ltr"=>"dir", "rtl"=>"dir"}, "fieldset"=>{"ltr"=>"dir", "rtl"=>"dir"}, "em"=>{"ltr"=>"dir", "rtl"=>"dir"}, "font"=>{"ltr"=>"dir", "rtl"=>"dir"}, "tbody"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "noscript"=>{"ltr"=>"dir", "rtl"=>"dir"}, "li"=>{"ltr"=>"dir", "rtl"=>"dir"}, "col"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "small"=>{"ltr"=>"dir", "rtl"=>"dir"}, "dd"=>{"ltr"=>"dir", "rtl"=>"dir"}, "i"=>{"ltr"=>"dir", "rtl"=>"dir"}, "menu"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"}, "strong"=>{"ltr"=>"dir", "rtl"=>"dir"}, "img"=> {"bottom"=>"align", "ismap"=>"ismap", "left"=>"align", "ltr"=>"dir", "middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"}, "optgroup"=>{"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir"}, "map"=>{"ltr"=>"dir", "rtl"=>"dir"}, "address"=>{"ltr"=>"dir", "rtl"=>"dir"}, "h1"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "sub"=>{"ltr"=>"dir", "rtl"=>"dir"}, "param"=>{"data"=>"valuetype", "object"=>"valuetype", "ref"=>"valuetype"}, "input"=> {"bottom"=>"align", "button"=>"type", "checkbox"=>"type", "checked"=>"checked", "disabled"=>"disabled", "file"=>"type", "hidden"=>"type", "image"=>"type", "ismap"=>"ismap", "left"=>"align", "ltr"=>"dir", "middle"=>"align", "password"=>"type", "radio"=>"type", "readonly"=>"readonly", "reset"=>"type", "right"=>"align", "rtl"=>"dir", "submit"=>"type", "text"=>"type", "top"=>"align"}, "h2"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "abbr"=>{"ltr"=>"dir", "rtl"=>"dir"}, "h3"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "strike"=>{"ltr"=>"dir", "rtl"=>"dir"}, "body"=>{"ltr"=>"dir", "rtl"=>"dir"}, "ins"=>{"ltr"=>"dir", "rtl"=>"dir"}, "button"=> {"button"=>"type", "disabled"=>"disabled", "ltr"=>"dir", "reset"=>"type", "rtl"=>"dir", "submit"=>"type"}, "h4"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "select"=> {"disabled"=>"disabled", "ltr"=>"dir", "multiple"=>"multiple", "rtl"=>"dir"}, "caption"=> {"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir", "top"=>"align"}, "colgroup"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "tr"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "del"=>{"ltr"=>"dir", "rtl"=>"dir"}, "h5"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "iframe"=> {"0"=>"frameborder", "1"=>"frameborder", "auto"=>"scrolling", "bottom"=>"align", "left"=>"align", "middle"=>"align", "no"=>"scrolling", "right"=>"align", "top"=>"align", "yes"=>"scrolling"}} # :startdoc: # The code above is auto-generated. Don't edit manually. end hpricot-0.8.6/lib/hpricot/elements.rb0000644000175000017500000003606711710073440017143 0ustar boutilboutilmodule Hpricot # Once you've matched a list of elements, you will often need to handle them as # a group. Or you may want to perform the same action on each of them. # Hpricot::Elements is an extension of Ruby's array class, with some methods # added for altering elements contained in the array. # # If you need to create an element array from regular elements: # # Hpricot::Elements[ele1, ele2, ele3] # # Assuming that ele1, ele2 and ele3 contain element objects (Hpricot::Elem, # Hpricot::Doc, etc.) # # == Continuing Searches # # Usually the Hpricot::Elements you're working on comes from a search you've # done. Well, you can continue searching the list by using the same at # and search methods you can use on plain elements. # # elements = doc.search("/div/p") # elements = elements.search("/a[@href='http://hoodwink.d/']") # elements = elements.at("img") # # == Altering Elements # # When you're altering elements in the list, your changes will be reflected in # the document you started searching from. # # doc = Hpricot("That's my spoon, Tyler.") # doc.at("b").swap("fork") # doc.to_html # #=> "That's my fork, Tyler." # # == Getting More Detailed # # If you can't find a method here that does what you need, you may need to # loop through the elements and find a method in Hpricot::Container::Trav # which can do what you need. # # For example, you may want to search for all the H3 header tags in a document # and grab all the tags underneath the header, but not inside the header. # A good method for this is next_sibling: # # doc.search("h3").each do |h3| # while ele = h3.next_sibling # ary << ele # stuff away all the elements under the h3 # end # end # # Most of the useful element methods are in the mixins Hpricot::Traverse # and Hpricot::Container::Trav. class Elements < Array # Searches this list for any elements (or children of these elements) matching # the CSS or XPath expression +expr+. Root is assumed to be the element scanned. # # See Hpricot::Container::Trav.search for more. def search(*expr,&blk) Elements[*map { |x| x.search(*expr,&blk) }.flatten.uniq] end alias_method :/, :search # Searches this list for the first element (or child of these elements) matching # the CSS or XPath expression +expr+. Root is assumed to be the element scanned. # # See Hpricot::Container::Trav.at for more. def at(expr, &blk) if expr.kind_of? Fixnum super else search(expr, &blk)[0] end end alias_method :%, :at # Convert this group of elements into a complete HTML fragment, returned as a # string. def to_html map { |x| x.output("") }.join end alias_method :to_s, :to_html # Returns an HTML fragment built of the contents of each element in this list. # # If a HTML +string+ is supplied, this method acts like inner_html=. def inner_html(*string) if string.empty? map { |x| x.inner_html }.join else x = self.inner_html = string.pop || x end end alias_method :html, :inner_html alias_method :innerHTML, :inner_html # Replaces the contents of each element in this list. Supply an HTML +string+, # which is loaded into Hpricot objects and inserted into every element in this # list. def inner_html=(string) each { |x| x.inner_html = string } end alias_method :html=, :inner_html= alias_method :innerHTML=, :inner_html= # Returns an string containing the text contents of each element in this list. # All HTML tags are removed. def inner_text map { |x| x.inner_text }.join end alias_method :text, :inner_text # Remove all elements in this list from the document which contains them. # # doc = Hpricot("Remove this: here") # doc.search("b").remove # doc.to_html # => "Remove this: " # def remove each { |x| x.parent.children.delete(x) } end # Empty the elements in this list, by removing their insides. # # doc = Hpricot("

We have so much to say.

") # doc.search("i").empty # doc.to_html # => "

We have to say.

" # def empty each { |x| x.inner_html = nil } end # Add to the end of the contents inside each element in this list. # Pass in an HTML +str+, which is turned into Hpricot elements. def append(str = nil, &blk) each { |x| x.html(x.children + x.make(str, &blk)) } end # Add to the start of the contents inside each element in this list. # Pass in an HTML +str+, which is turned into Hpricot elements. def prepend(str = nil, &blk) each { |x| x.html(x.make(str, &blk) + x.children) } end # Add some HTML just previous to each element in this list. # Pass in an HTML +str+, which is turned into Hpricot elements. def before(str = nil, &blk) each { |x| x.parent.insert_before x.make(str, &blk), x } end # Just after each element in this list, add some HTML. # Pass in an HTML +str+, which is turned into Hpricot elements. def after(str = nil, &blk) each { |x| x.parent.insert_after x.make(str, &blk), x } end # Wraps each element in the list inside the element created by HTML +str+. # If more than one element is found in the string, Hpricot locates the # deepest spot inside the first element. # # doc.search("a[@href]"). # wrap(%{}) # # This code wraps every link on the page inside a +div.link+ and a +div.link_inner+ nest. def wrap(str = nil, &blk) each do |x| wrap = x.make(str, &blk) nest = wrap.detect { |w| w.respond_to? :children } unless nest raise "No wrapping element found." end x.parent.replace_child(x, wrap) nest = nest.children.first until nest.empty? nest.html([x]) end end # Gets and sets attributes on all matched elements. # # Pass in a +key+ on its own and this method will return the string value # assigned to that attribute for the first elements. Or +nil+ if the # attribute isn't found. # # doc.search("a").attr("href") # #=> "http://hacketyhack.net/" # # Or, pass in a +key+ and +value+. This will set an attribute for all # matched elements. # # doc.search("p").attr("class", "basic") # # You may also use a Hash to set a series of attributes: # # (doc/"a").attr(:class => "basic", :href => "http://hackety.org/") # # Lastly, a block can be used to rewrite an attribute based on the element # it belongs to. The block will pass in an element. Return from the block # the new value of the attribute. # # records.attr("href") { |e| e['href'] + "#top" } # # This example adds a #top anchor to each link. # def attr key, value = nil, &blk if value or blk each do |el| el.set_attribute(key, value || blk[el]) end return self end if key.is_a? Hash key.each { |k,v| self.attr(k,v) } return self else return self[0].get_attribute(key) end end alias_method :set, :attr # Adds the class to all matched elements. # # (doc/"p").add_class("bacon") # # Now all paragraphs will have class="bacon". def add_class class_name each do |el| next unless el.respond_to? :get_attribute classes = el.get_attribute('class').to_s.split(" ") el.set_attribute('class', classes.push(class_name).uniq.join(" ")) end self end # Remove an attribute from each of the matched elements. # # (doc/"input").remove_attr("disabled") # def remove_attr name each do |el| next unless el.respond_to? :remove_attribute el.remove_attribute(name) end self end # Removes a class from all matched elements. # # (doc/"span").remove_class("lightgrey") # # Or, to remove all classes: # # (doc/"span").remove_class # def remove_class name = nil each do |el| next unless el.respond_to? :get_attribute if name classes = el.get_attribute('class').to_s.split(" ") el.set_attribute('class', (classes - [name]).uniq.join(" ")) else el.remove_attribute("class") end end self end ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^'"]*)'?"? *\]!i # " (for emacs) BRACK_RE = %r!(\[) *([^\]]*) *\]+!i FUNC_RE = %r!(:)?([a-zA-Z0-9\*_-]*)\( *[\"']?([^ \)]*?)['\"]? *\)! CUST_RE = %r!(:)([a-zA-Z0-9\*_-]*)()! CATCH_RE = %r!([:\.#]*)([a-zA-Z0-9\*_-]+)! def self.filter(nodes, expr, truth = true) until expr.empty? _, *m = *expr.match(/^(?:#{ATTR_RE}|#{BRACK_RE}|#{FUNC_RE}|#{CUST_RE}|#{CATCH_RE})/) break unless _ expr = $' m.compact! if m[0] == '@' m[0] = "@#{m.slice!(2,1).join}" end if m[0] == '[' && m[1] =~ /^\d+$/ m = [":", "nth", m[1].to_i-1] end if m[0] == ":" && m[1] == "not" nodes, = Elements.filter(nodes, m[2], false) elsif "#{m[0]}#{m[1]}" =~ /^(:even|:odd)$/ new_nodes = [] nodes.each_with_index {|n,i| new_nodes.push(n) if (i % 2 == (m[1] == "even" ? 0 : 1)) } nodes = new_nodes elsif "#{m[0]}#{m[1]}" =~ /^(:first|:last)$/ nodes = [nodes.send(m[1])] else meth = "filter[#{m[0]}#{m[1]}]" unless m[0].empty? if meth and Traverse.method_defined? meth args = m[2..-1] else meth = "filter[#{m[0]}]" if Traverse.method_defined? meth args = m[1..-1] end end args << -1 nodes = Elements[*nodes.find_all do |x| args[-1] += 1 x.send(meth, *args) ? truth : !truth end] end end [nodes, expr] end # Given two elements, attempt to gather an Elements array of everything between # (and including) those two elements. def self.expand(ele1, ele2, excl=false) ary = [] offset = excl ? -1 : 0 if ele1 and ele2 # let's quickly take care of siblings if ele1.parent == ele2.parent ary = ele1.parent.children[ele1.node_position..(ele2.node_position+offset)] else # find common parent p, ele1_p = ele1, [ele1] ele1_p.unshift p while p.respond_to?(:parent) and p = p.parent p, ele2_p = ele2, [ele2] ele2_p.unshift p while p.respond_to?(:parent) and p = p.parent common_parent = ele1_p.zip(ele2_p).select { |p1, p2| p1 == p2 }.flatten.last child = nil if ele1 == common_parent child = ele2 elsif ele2 == common_parent child = ele1 end if child ary = common_parent.children[0..(child.node_position+offset)] end end end return Elements[*ary] end def filter(expr) nodes, = Elements.filter(self, expr) nodes end def not(expr) if expr.is_a? Traverse nodes = self - [expr] else nodes, = Elements.filter(self, expr, false) end nodes end private def copy_node(node, l) l.instance_variables.each do |iv| node.instance_variable_set(iv, l.instance_variable_get(iv)) end end end module Traverse def self.filter(tok, &blk) define_method("filter[#{tok.is_a?(String) ? tok : tok.inspect}]", &blk) end filter '' do |name,i| name == '*' || (self.respond_to?(:name) && self.name.downcase == name.downcase) end filter '#' do |id,i| self.elem? and get_attribute('id').to_s == id end filter '.' do |name,i| self.elem? and classes.include? name end filter :lt do |num,i| self.position < num.to_i end filter :gt do |num,i| self.position > num.to_i end nth = proc { |num,i| self.position == num.to_i } nth_first = proc { |*a| self.position == 0 } nth_last = proc { |*a| self == parent.children_of_type(self.name).last } filter :nth, &nth filter :eq, &nth filter ":nth-of-type", &nth filter :first, &nth_first filter ":first-of-type", &nth_first filter :last, &nth_last filter ":last-of-type", &nth_last filter :even do |num,i| self.position % 2 == 0 end filter :odd do |num,i| self.position % 2 == 1 end filter ':first-child' do |i| self == parent.containers.first end filter ':nth-child' do |arg,i| case arg when 'even'; (parent.containers.index(self) + 1) % 2 == 0 when 'odd'; (parent.containers.index(self) + 1) % 2 == 1 else self == (parent.containers[arg.to_i - 1]) end end filter ":last-child" do |i| self == parent.containers.last end filter ":nth-last-child" do |arg,i| self == parent.containers[-1-arg.to_i] end filter ":nth-last-of-type" do |arg,i| self == parent.children_of_type(self.name)[-1-arg.to_i] end filter ":only-of-type" do |arg,i| parent.children_of_type(self.name).length == 1 end filter ":only-child" do |arg,i| parent.containers.length == 1 end filter :parent do |*a| containers.length > 0 end filter :empty do |*a| elem? && inner_html.length == 0 end filter :root do |*a| self.is_a? Hpricot::Doc end filter 'text' do |*a| self.text? end filter 'comment' do |*a| self.comment? end filter :contains do |arg, ignore| html.include? arg end pred_procs = {'text()' => proc { |ele, *_| ele.inner_text.strip }, '@' => proc { |ele, attr, *_| ele.get_attribute(attr).to_s if ele.elem? }} oper_procs = {'=' => proc { |a,b| a == b }, '!=' => proc { |a,b| a != b }, '~=' => proc { |a,b| a.split(/\s+/).include?(b) }, '|=' => proc { |a,b| a =~ /^#{Regexp::quote b}(-|$)/ }, '^=' => proc { |a,b| a.index(b) == 0 }, '$=' => proc { |a,b| a =~ /#{Regexp::quote b}$/ }, '*=' => proc { |a,b| idx = a.index(b) }} pred_procs.each do |pred_n, pred_f| oper_procs.each do |oper_n, oper_f| filter "#{pred_n}#{oper_n}" do |*a| qual = pred_f[self, *a] oper_f[qual, a[-2]] if qual end end end filter 'text()' do |val,i| self.children.grep(Hpricot::Text).detect { |x| x.content =~ /\S/ } if self.children end filter '@' do |attr,val,i| self.elem? and has_attribute? attr end filter '[' do |val,i| self.elem? and search(val).length > 0 end end end hpricot-0.8.6/lib/hpricot/builder.rb0000644000175000017500000001424211710073440016744 0ustar boutilboutilrequire 'hpricot/tags' require 'fast_xs' require 'hpricot/blankslate' require 'hpricot/htmlinfo' module Hpricot # XML unescape def self.uxs(str) str.to_s. gsub(/\&(\w+);/) { [NamedCharacters[$1] || 63].pack("U*") }. # 63 = ?? (query char) gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }. gsub(/\&\#x([0-9a-fA-F]+);/) { [$1.to_i(16)].pack("U*") } end def self.build(ele = Doc.new, assigns = {}, &blk) ele.extend Builder assigns.each do |k, v| ele.instance_variable_set("@#{k}", v) end ele.instance_eval(&blk) ele end module Builder @@default = { :indent => 0, :output_helpers => true, :output_xml_instruction => true, :output_meta_tag => true, :auto_validation => true, :tagset => Hpricot::XHTMLTransitional, :root_attributes => { :xmlns => 'http://www.w3.org/1999/xhtml', :'xml:lang' => 'en', :lang => 'en' } } def self.set(option, value) @@default[option] = value end def add_child ele ele.parent = self self.children ||= [] self.children << ele ele end # Write a +string+ to the HTML stream, making sure to escape it. def text!(string) add_child Text.new(string.fast_xs) end # Write a +string+ to the HTML stream without escaping it. def text(string) add_child Text.new(string) nil end alias_method :<<, :text alias_method :concat, :text # Create a tag named +tag+. Other than the first argument which is the tag name, # the arguments are the same as the tags implemented via method_missing. def tag!(tag, *args, &block) ele_id = nil if @auto_validation and @tagset if !@tagset.tagset.has_key?(tag) raise InvalidXhtmlError, "no element `#{tag}' for #{tagset.doctype}" elsif args.last.respond_to?(:to_hash) attrs = args.last.to_hash if @tagset.forms.include?(tag) and attrs[:id] attrs[:name] ||= attrs[:id] end attrs.each do |k, v| atname = k.to_s.downcase.intern unless k =~ /:/ or @tagset.tagset[tag].include? atname raise InvalidXhtmlError, "no attribute `#{k}' on #{tag} elements" end if atname == :id ele_id = v.to_s if @elements.has_key? ele_id raise InvalidXhtmlError, "id `#{ele_id}' already used (id's must be unique)." end end end end end # turn arguments into children or attributes childs = [] attrs = args.grep(Hash) childs.concat((args - attrs).flatten.map do |x| if x.respond_to? :to_html Hpricot.make(x.to_html) elsif x Text.new(x.fast_xs) end end.flatten) attrs = attrs.inject({}) do |hsh, ath| ath.each do |k, v| hsh[k] = v.to_s.fast_xs if v end hsh end # create the element itself tag = tag.to_s f = Elem.new(tag, attrs, childs, ETag.new(tag)) # build children from the block if block build(f, &block) end add_child f f end def build(*a, &b) Hpricot.build(*a, &b) end # Every HTML tag method goes through an html_tag call. So, calling div is equivalent # to calling html_tag(:div). All HTML tags in Hpricot's list are given generated wrappers # for this method. # # If the @auto_validation setting is on, this method will check for many common mistakes which # could lead to invalid XHTML. def html_tag(sym, *args, &block) if @auto_validation and @tagset.self_closing.include?(sym) and block raise InvalidXhtmlError, "the `#{sym}' element is self-closing, please remove the block" elsif args.empty? and block.nil? CssProxy.new(self, sym) else tag!(sym, *args, &block) end end XHTMLTransitional.tags.each do |k| class_eval %{ def #{k}(*args, &block) html_tag(#{k.inspect}, *args, &block) end } end def doctype(target, pub, sys) add_child DocType.new(target, pub, sys) end remove_method :head # Builds a head tag. Adds a meta tag inside with Content-Type # set to text/html; charset=utf-8. def head(*args, &block) tag!(:head, *args) do tag!(:meta, "http-equiv" => "Content-Type", "content" => "text/html; charset=utf-8") if @output_meta_tag instance_eval(&block) end end # Builds an html tag. An XML 1.0 instruction and an XHTML 1.0 Transitional doctype # are prepended. Also assumes :xmlns => "http://www.w3.org/1999/xhtml", # :lang => "en". def xhtml_transitional(attrs = {}, &block) # self.tagset = Hpricot::XHTMLTransitional xhtml_html(attrs, &block) end # Builds an html tag with XHTML 1.0 Strict doctype instead. def xhtml_strict(attrs = {}, &block) # self.tagset = Hpricot::XHTMLStrict xhtml_html(attrs, &block) end private def xhtml_html(attrs = {}, &block) instruct! if @output_xml_instruction doctype(:html, *@@default[:tagset].doctype) tag!(:html, @@default[:root_attributes].merge(attrs), &block) end end # Class used by Markaby::Builder to store element options. Methods called # against the CssProxy object are added as element classes or IDs. # # See the README for examples. class CssProxy < BlankSlate # Creates a CssProxy object. def initialize(builder, sym) @builder, @sym, @attrs = builder, sym, {} end # Adds attributes to an element. Bang methods set the :id attribute. # Other methods add to the :class attribute. def method_missing(id_or_class, *args, &block) if (idc = id_or_class.to_s) =~ /!$/ @attrs[:id] = $` else @attrs[:class] = @attrs[:class].nil? ? idc : "#{@attrs[:class]} #{idc}".strip end if block or args.any? args.push(@attrs) return @builder.tag!(@sym, *args, &block) end return self end end end hpricot-0.8.6/lib/hpricot/blankslate.rb0000644000175000017500000000350411710073440017435 0ustar boutilboutil#!/usr/bin/env ruby #-- # Copyright 2004 by Jim Weirich (jim@weirichhouse.org). # All rights reserved. # Permission is granted for use, copying, modification, distribution, # and distribution of modified versions of this work as long as the # above copyright notice is included. #++ module Hpricot # BlankSlate provides an abstract base class with no predefined # methods (except for \_\_send__ and \_\_id__). # BlankSlate is useful as a base class when writing classes that # depend upon method_missing (e.g. dynamic proxies). class BlankSlate class << self # Hide the method named +name+ in the BlankSlate class. Don't # hide +instance_eval+ or any method beginning with "__". def hide(name) undef_method name if instance_methods.include?(name.to_s) and name !~ /^(__|instance_eval)/ end end instance_methods.each { |m| hide(m) } end end # Since Ruby is very dynamic, methods added to the ancestors of # BlankSlate after BlankSlate is defined will show up in the # list of available BlankSlate methods. We handle this by defining a # hook in the Object and Kernel classes that will hide any defined module Kernel class << self alias_method :hpricot_slate_method_added, :method_added # Detect method additions to Kernel and remove them in the # BlankSlate class. def method_added(name) hpricot_slate_method_added(name) return if self != Kernel Hpricot::BlankSlate.hide(name) end end end class Object class << self alias_method :hpricot_slate_method_added, :method_added # Detect method additions to Object and remove them in the # BlankSlate class. def method_added(name) hpricot_slate_method_added(name) return if self != Object Hpricot::BlankSlate.hide(name) end end end hpricot-0.8.6/extras/0000755000175000017500000000000011710073440014056 5ustar boutilboutilhpricot-0.8.6/extras/hpricot.png0000644000175000017500000002663711710073440016252 0ustar boutilboutilPNG  IHDR'ΟgAMA7tEXtSoftwareAdobe ImageReadyqe<PLTE$ea]vcX.JV#0تfblp&4!X%TI wPCh %HY% [ ES*Ґlp.Rf/֋H Jϱ9[e O6˶ioN8ُ-ѫtwte5q/ΎNܶ}u%T(9ʯU0݆O{ ƍpr5{kjJ {"ThȁY8y|R2 H3Wh98LM ᑅnk?;! M+IDATx읋_iǹ(8)@ zA4oEjhyAV-_9`V澟yj7}QBd2yd9%s9K sAPeO`">dV oE\NOYP 2?b>wJ]n M&^Cn+A&-sxu~~M -YMJyBҮl6+nRtb-2 9Q:٨i9N̡r{ C  sAdUDZ18I̡D7cC Nw5%z>߳'BjDuW̡`uH _C0D$5qd rE[HFv8 56JjjOL rJ `8 S's;qj 8Ua?rm^Y-2bj F9|#:9)Db'}_q֕XI rP4.iPv_eZ(ri2gk`(*> (wzK]`yo{3 5v8H CA15zQ9HMbVp{nRwGݣra%|( Q V{Xu Bjn(tu0]^,@!+4 0pl2X&O >^Y^0 K̉ í[9P\{BV*c0E4Ne"K?R_!P98&򴵵 VI)e4RtL*$gva:eU<3 嗦Z n+.=gv]iR*JegApRc86Qv'sUBOgBzu ^V_Qf^9XEF&4!`$?]%)6zXpsFc^u3[}{&N'35:bjb v0P@ 漞S^]cΜ9VcPLkc-dqSY!*<{op]ꁮW5:$A/h9p?R3a90Eoh8 @mb@}@P Xbꁄ S92kmxhRS#HxvF ؝3<:AAmmG=MBቺҳ#D1\|eى$guK$#DiL|sLSS9 @ 'l= zpHgzE1'sA Z$l8 &+ɧDA\]]c lBG:'@X hrs O$ n 8s`3l rR xeIW7N{HA!m$|+7's;=bb2k8 @jFKr#`<#E(wQ\A2aHh MV 00/?gtT:9^6xpZd/>6'N@ښ^W,%p!!prbJ0349I {ve(i٥O#ohZi8,!Є꠾vEvii It#;jA33gr4{^[QHWniF5@oX0`-3 >xqztk [viji~rSm-5]Hxg" /qrNKr 7azp:b)ʬj07{߾RP]GE#XL s@3'p&ܟ6LR7u=py3.oV^1;k>,awn V=j$]$Ţq°uHϻ[ ѽnTF=-wV6ph*ylT ndR[2>=\"=b (G->ZX T.4:wJDVCSYG 87($ϡIyCd<-ZQ҂B /;˨uk;;;G.}:<<\RU41yxb"I>UW?\z;BVcmbK,8鎜57t.3{GpssZ,yQgTk4V+ZMg{˅۵0>0AA@BwP"Q%S))MpVO^M "%dd ktGqQbP$D榻YdOϸO Q1qN@4. Ѻ\>c_㋰XKJm ͛cXa$D#+L\kSrޝ)W( z'Qw>1XXk<lgk'C)vRꐞ:ف`}]c"7xCZ?IBBHJqFi5|~kğ0P"B >,P gLQ@ )tTԈWݍNWB3P'P<RyK8r~TG5h:_ 5E;X.-n«7oƛ)>B ^:tЄ8:RwRR%QޛCH(PC3uѷb!+K$ t dBP.4Q778HPԮ..}vVwjM DmK;bGKqSapk %iI;B.cI DFRX$Q ã( :@e,-r @Tù+ͻgkqT#hn&KOxu7CrZKQН.<hݒLM2fp[_?.-^zL}Pk K?:ļzB\USY˙X`JS 7'ǡ4no_uoϤb4(=k~V{4G.h( kx%c'3IX3ֆ:[Y~G!chtɊ}\F-\HeUXČb89a$G8+ThF5>[0I {IoЅ&EAϳg5]ٍG?7}^u?FxĬGG]Fsi4F#z)TjL'#ڑCƯ8.8O5+ծjx1fEf=vVS#qy 6MB(B^:y2`Hj4uI0.x32r uAcKQ^ dTy[BK8%cCV-=ET1UX[oXzaSDz aqk' 8s2鹿_6/V9$ 9= G[ʛ[]'۫GB(qk.+7utF5 U`.#5DZם O *C;m(xDz>z1/fBk h=nqR(7O-:qfkZ7bXR9/` 2 Vgw`*II]tnn%0h {ՉMfiTŮ.QyȵZ$BJ'bUUЯ[YE{=c4Ʊ!'UjT[U祣}9 3Ȃ톬.Ni.)a҆c>`"!3'R9 nG9Z=9^ZWS5$owr-8tW{wж8 |1|)d~6xLkS͛ˋ)pgio}6nWԥHšz6-dZC0FF xDz@dEH/FId[@К?|JAtyƳjGzаv¡\.f*$T 6DRj0 @a٘"GUHkJ@W+ޢZ.D_%r(íf4=5!x +VL]X!e?R'dP{t]O7?|rNvOx,J\;'JrRI"a5Rıv,>#PpTHj)txKbP x D`n2&6fDcH(|*<Cм21Ro#A+Xئ .g* |+ n8'JA54 dKz2@d IjwFkqUg1}^YPAO9E}ap%M[A.;1@v2zh ' ˶0?㇐vtsUéRxz}qU-@"p a`U^X ԂE(PB8̭Rpϒ>VϋMeqR!gp2k]'bFl7 {@EU Đ#"zر` 6HBz@mh)aN: vBZ gԃPCK*ypVqsgdr|`ݏ€ tG974."tYq_] . HAi[L/U 6[CCf3.U'49(;~&}3)MZ^WPVu8B8X8C fȰZ4RO%&K {UVk _c5Cp@7mX6صM&aa+` 4Q8()|̸U[ex2=^.0:X@,3:Eڌ 6`HQI[rvx:%v,UNƣep(5,PjzXyh=Mdr_0@ IBeR%qV @VLCXuyy)U30`% f'*ÄNZw:pRg'y~OwUg;k6#Y9 F>kY? *)kr#` ֺ1LXPa1؝Dd' `}>`~Cw٣Cb/<3͙ؓXiad\倹Ei`3%U2HӍfIWppvڝJg*k` >NFI〗O`` 9پLFkO҇)"RaMϊ}8>2@ &)dr0BϐN:PVQ+lQ9ؿVdKH<' F>i~?uF}?8  h9|&!א-|*fDS9I.zIK%p`pvmmK34ub)NX,P=| LWl~%S>~y8䰃(d,ŕA J+HIsTc<Ք (<ݽK ًM⠀RxX#Z?ߑ_ p{;/ BR|"!Y5 PJCee%AnHۺFA 2opYCB3b./\{FfC풑OTI?M4 0vh,ֈ TXe0*V2(X OQjkl?7<,,p{/LͭtL LCC"Gm OVqd o9p;G{t8גPJ*0\U]b*I(afǺm5ϊyO..N? qF죲P)OfK0hLZX$a̴ÑElN4)4VFMj 9ψ?^dvVC|Rxy$Vz)mHbL~/fiĞ٬lA4S @Dn7I q?ݷ[_ġFyv[UB9˿A(8D a:`o[Аܡ*x\ձ4xO sV=%],!0lh.hLƄ-X#p ^Hi.:{ qI b_lo)C䪓8&&ŅJ& Xˠipnm?m6&˜(*|}R$%A{fUJX]  HKR!nmVOeξWz%H:䅾ŰF㧐5 .V ^`OXI) N*^9 =šȡ_ǯh3fM%/.ª@G Tѓ27&f`7bד~PP~ADFFFZm&@eԀmKpb` ڊ7أN$Ћw[lK"`s;s<*$J޷,{6h%\ڝ_(mi$pSe8Txy:OE9TB%[>8/6IQ 6Q뻥1gC uad\d+P~ QC xN5ZM*`#8&I}t1FߜkBJ,ӹOlES_sf~q,&dr 4A[r̷L|ڰQ7f湍]{wP= U?H@==-oG<x nEcn61XGz7#X7{woPv>k `/ U0kIUid/M(CN j۬+#Fz2qX8L"`m+)y'Xf-×ǡ(h<#͋tⓥl4HO`3A--ׁ`ɬ] tB8Ǿ}1`Ug)s 쎼5$J8[2^Kk } ÷g5)*; sʁ88, IMl*7e!m9y3{ p_ i<{[FJ֧O{)w?z[W*Va c#z`gWp`97&qz<+sikb|_KW}˗/gɮB^9this is titlehttp://fake.com") assert_equal "this is title", (doc/:rss/:channel/:title).text assert_equal "http://fake.com", (doc/:rss/:channel/:link).text end # make sure XML doesn't get downcased def test_casing doc = Hpricot::XML(TestFiles::WHY) assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html assert_equal 1, (doc/"guid[@isPermaLink]").length end # be sure tags named "text" are ok def test_text_tags doc = Hpricot::XML("City PoisonedRita Lee has poisoned Brazil.") assert_equal "City Poisoned", (doc/"title").text end end hpricot-0.8.6/test/test_preserved.rb0000644000175000017500000000564111710073440017120 0ustar boutilboutil# -*- coding: utf-8 -*- #!/usr/bin/env ruby require 'test/unit' require 'hpricot' require 'load_files' unless "".respond_to?(:lines) require 'enumerator' class String def lines Enumerable::Enumerator.new(self, :each_line) end end end class TestPreserved < Test::Unit::TestCase def assert_roundtrip str doc = Hpricot(str) yield doc if block_given? str2 = doc.to_original_html if RUBY_VERSION =~ /^1.9/ str2.force_encoding('UTF-8') end str.lines.zip(str2.lines).each do |s1, s2| assert_equal s1, s2 end end def assert_html str1, str2 doc = Hpricot(str2) yield doc if block_given? assert_equal str1, doc.to_original_html end def test_simple str = "

Hpricot is a you know uh fine thing.

" assert_html str, str assert_html "

Hpricot is a you know uh fine thing.

", str do |doc| (doc/:p).set('class', 'new') end end def test_parent str = "Test

Paragraph one.

Paragraph two.

" assert_html str, str assert_html "

Paragraph one.

Paragraph two.

", str do |doc| (doc/:head).remove (doc/:div).set('id', 'all') (doc/:p).wrap('
') end end def test_escaping_of_contents doc = Hpricot(TestFiles::BOINGBOING) assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip end def test_files assert_roundtrip TestFiles::BASIC assert_roundtrip TestFiles::BOINGBOING assert_roundtrip TestFiles::CY0 end def test_fixup_link doc = %{ht} assert_roundtrip doc assert_equal Hpricot(doc).to_s, %{ht} assert_equal Hpricot.XML(doc).to_s, %{ht} end def test_escaping_of_attrs # ampersands in URLs str = %{
Google} link = (doc = Hpricot(str)).at(:a) assert_equal "http://google.com/search?q=hpricot&l=en", link['href'] assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href'] assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href') assert_equal "http://google.com/search?q=hpricot&l=en", link.raw_attributes['href'] assert_equal str, doc.to_html # alter the url link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")" assert_equal %{Google}, doc.to_html end end hpricot-0.8.6/test/test_paths.rb0000644000175000017500000000144311710073440016234 0ustar boutilboutil#!/usr/bin/env ruby require 'test/unit' require 'hpricot' require 'load_files' class TestParser < Test::Unit::TestCase def test_roundtrip @basic = Hpricot.parse(TestFiles::BASIC) %w[link link[2] body #link1 a p.ohmy].each do |css_sel| ele = @basic.at(css_sel) assert_equal ele, @basic.at(ele.css_path) assert_equal ele, @basic.at(ele.xpath) end end def test_attr_brackets doc = Hpricot('') assert_equal 1, (doc/'input[@name^="vendor[porkpies]"]').length assert_equal 1, (doc/'input[@name="vendor[porkpies]"]').length assert_equal 0, (doc/'input[@name$="]]]]]"]').length doc = Hpricot('') assert_equal 1, (doc/'input[@name^="vendor[porkpies][meaty]"]').length end end hpricot-0.8.6/test/test_parser.rb0000644000175000017500000004205511710073440016415 0ustar boutilboutil# -*- coding: utf-8 -*- #!/usr/bin/env ruby require 'test/unit' require 'hpricot' require 'load_files' class TestParser < Test::Unit::TestCase def test_set_attr @basic = Hpricot.parse(TestFiles::BASIC) @basic.search('//p').set('class', 'para') assert_equal 4, @basic.search('//p').length assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length end # Test creating a new element def test_new_element elem = Hpricot::Elem.new('form') assert_not_nil(elem) assert_not_nil(elem.attributes) end def test_scan_text assert_equal 'FOO', Hpricot.make("FOO").children.first.content end def test_filter_by_attr @boingboing = Hpricot.parse(TestFiles::BOINGBOING) # this link is escaped in the doc link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware' assert_equal link, @boingboing.at("a[@href='#{link}']")['href'] end def test_filter_contains @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 'Sample XHTML', @basic.search("title:contains('Sample')").to_s end def test_get_element_by_id @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 'link1', @basic.get_element_by_id('link1')['id'] assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id') end def test_get_element_by_tag_name @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id') assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id') end def test_get_elements_by_tag_name_star simple = Hpricot.parse("

First

Second

") assert_equal 3, simple.get_elements_by_tag_name("*").size assert_equal 1, simple.get_elements_by_tag_name("div").size assert_equal 2, simple.get_elements_by_tag_name("p").size end def test_output_basic @basic = Hpricot.parse(TestFiles::BASIC) @basic2 = Hpricot.parse(@basic.inner_html) scan_basic @basic2 end def test_scan_basic @basic = Hpricot.parse(TestFiles::BASIC) scan_basic @basic end def scan_basic doc assert_kind_of Hpricot::XMLDecl, doc.children.first assert_not_equal doc.children.first.to_s, doc.children[1].to_s assert_equal 'link1', doc.at('#link1')['id'] assert_equal 'link1', doc.at("p a")['id'] assert_equal 'link1', (doc/:p/:a).first['id'] assert_equal 'link1', doc.search('p').at('a').get_attribute('id') assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id') assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0] assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0] assert_equal 4, (doc/'p').filter('*').length assert_equal 4, (doc/'p').filter('* *').length eles = (doc/'p').filter('.ohmy') assert_equal 1, eles.length assert_equal 'ohmy', eles.first.get_attribute('class') assert_equal 3, (doc/'p:not(.ohmy)').length assert_equal 3, (doc/'p').not('.ohmy').length assert_equal 3, (doc/'p').not(eles.first).length assert_equal 2, (doc/'p').filter('[@class]').length assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class') assert_equal 1, (doc/'p').filter('[@class~="final"]').length assert_equal 2, (doc/'p > a').length assert_equal 1, (doc/'p.ohmy > a').length assert_equal 2, (doc/'p / a').length assert_equal 2, (doc/'link ~ link').length assert_equal 3, (doc/'title ~ link').length assert_equal 5, (doc/"//p/text()").length assert_equal 6, (doc/"//p[a]//text()").length assert_equal 2, (doc/"//p/a/text()").length end def test_positional h = Hpricot( "

one

two

" ) assert_equal "

one

", h.search("//div/p:eq(0)").to_s assert_equal "

one

", h.search("//div/p:first").to_s assert_equal "

one

", h.search("//div/p:first()").to_s end def test_pace doc = Hpricot(TestFiles::PACE_APPLICATION) assert_equal 'get', doc.at('form[@name=frmSect11]')['method'] # assert_equal '2', doc.at('#hdnSpouse')['value'] end def test_scan_boingboing @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 60, (@boingboing/'p.posted').length assert_equal 1, @boingboing.search("//a[@name='027906']").length assert_equal 10, @boingboing.search("script comment()").length assert_equal 3, @boingboing.search("a[text()*='Boing']").length assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length assert_equal 0, @boingboing.search("h3[text()='College']").length assert_equal 60, @boingboing.search("h3").length assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length assert_equal 17, @boingboing.search("h3[text()$='s']").length assert_equal 116, @boingboing.search("p[text()]").length assert_equal 211, @boingboing.search("p").length end def test_reparent doc = Hpricot(%{
}) div1 = doc.search('#blurb_1') div1.before('
') div0 = doc.search('#blurb_0') div0.before('
') assert_equal 'div', doc.at('#blurb_1').name end def test_siblings @basic = Hpricot.parse(TestFiles::BASIC) t = @basic.at(:title) e = t.next_sibling assert_equal 'test1.css', e['href'] assert_equal 'title', e.previous_sibling.name end def test_css_negation @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 3, (@basic/'p:not(.final)').length end def test_remove_attribute @basic = Hpricot.parse(TestFiles::BASIC) (@basic/:p).each { |ele| ele.remove_attribute('class') } assert_equal 0, (@basic/'p[@class]').length end def test_abs_xpath @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length assert_equal 18, @boingboing.search("//script").length divs = @boingboing.search("//script/../div") assert_equal 2, divs.length imgs = @boingboing.search('//div/p/a/img') assert_equal 16, imgs.length assert_equal 16, @boingboing.search('//div').search('p/a/img').length assert imgs.all? { |x| x.name == 'img' } end def test_predicates @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length p_imgs = @boingboing.search('//div/p[/a/img]') assert_equal 16, p_imgs.length assert p_imgs.all? { |x| x.name == 'p' } p_imgs = @boingboing.search('//div/p[a/img]') assert_equal 16, p_imgs.length assert p_imgs.all? { |x| x.name == 'p' } assert_equal 1, @boingboing.search('//input[@checked]').length end def test_tag_case @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE) assert_equal 2, @tenderlove.search('//a').length assert_equal 3, @tenderlove.search('//area').length assert_equal 2, @tenderlove.search('//meta').length end def test_alt_predicates @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 1, @boingboing.search('//table/tr:last').length @basic = Hpricot.parse(TestFiles::BASIC) assert_equal "

The third paragraph

", @basic.search('p:eq(2)').to_html assert_equal '

THE FINAL PARAGRAPH

', @basic.search('p:last').to_html assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class') end def test_insert_after # ticket #63 doc = Hpricot('
') (doc/'div').each do |element| element.after('

Paragraph 1

Paragraph 2

') end assert_equal doc.to_html, '

Paragraph 1

Paragraph 2

' end def test_insert_before # ticket #61 doc = Hpricot('
') (doc/'div').each do |element| element.before('

Paragraph 1

Paragraph 2

') end assert_equal doc.to_html, '

Paragraph 1

Paragraph 2

' end def test_many_paths @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length assert_equal 18, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length end def test_stacked_search @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img') end def test_attr_casing doc = Hpricot("A simple test string.") assert_equal (doc % :a)[:href], "a" assert_equal (doc % :a)[:HREF], nil assert_equal (doc % :a)['href'], "a" assert_equal (doc % :a)['HREF'], nil end def test_class_search # test case sent by Chih-Chao Lam doc = Hpricot("
abc
") assert_equal 1, doc.search(".xyz").length doc = Hpricot("
abc
xyz
") assert_equal 1, doc.search(".xyz").length assert_equal 4, doc.search("*").length end def test_kleene_star # bug noticed by raja bhatia doc = Hpricot("1
2
3
4") assert_equal 2, doc.search("*[@class*='small']").length assert_equal 2, doc.search("*.small").length assert_equal 2, doc.search(".small").length assert_equal 2, doc.search(".large").length end def test_empty_comment doc = Hpricot("

") assert doc.children[0].children[0].comment? doc = Hpricot("

") assert doc.children[0].children[0].comment? end def test_body_newlines @immob = Hpricot.parse(TestFiles::IMMOB) body = @immob.at(:body) {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10', 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066', 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v| assert_equal v, body[k] end end def test_nested_twins @doc = Hpricot("
Hi
there
") assert_equal 1, (@doc/"div div").length end def test_wildcard @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 3, (@basic/"*[@id]").length assert_equal 3, (@basic/"//*[@id]").length end def test_javascripts @immob = Hpricot.parse(TestFiles::IMMOB) assert_equal 3, (@immob/:script)[0].inner_html.scan(/
}, %{
}, %{
}, %{
}]. each do |str| doc = Hpricot(str) assert_equal 1, (doc/:form).length assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action'] end end def test_procins doc = Hpricot("\n") assert_equal "php", doc.children[0].target assert_equal "blah='blah'", doc.children[2].content end def test_no_buffer_error Hpricot(%{

\n\n\n\n

}) end def test_youtube_attr str = <<-edoc Lorem ipsum. Jolly roger, ding-dong sing-a-long Check out my posting, I have bright mice in large clown cars. HAI edoc assert_equal "HAI", doc.at("body").inner_text end # http://github.com/hpricot/hpricot/issues#issue/28 def test_invalid_inner_text assert_equal "A", Hpricot('A&B;').inner_text[0...1] end # http://github.com/hpricot/hpricot/issues#issue/25 def test_encoding_compatibility_error Hpricot("

\xC3\x9Cber

").inner_text end # Reported by Jonathan Nichols on the Hpricot list (24 May 2007) def test_self_closed_form doc = Hpricot(<<-edoc)
edoc assert_equal "button", doc.at("//form/input")['type'] end def test_escaped_quote # Backslash '\' is not an escape character in HTML. doc = Hpricot("

test

") assert_equal "C:\\dir\\", doc.at("input")["value"] doc = Hpricot('

test

') assert_equal "C:\\dir\\", doc.at("input")["value"] end def test_filters @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 0, (@basic/"title:parent").size assert_equal 3, (@basic/"p:parent").size assert_equal 3, (@basic/"link:empty").size assert_equal 1, (@basic/"span:empty").size end def test_keep_cdata str = %{} assert_equal str, Hpricot(str).to_html end def test_namespace chunk = <<-END hi END doc = Hpricot::XML(chunk) assert (doc/"//t:sam").size > 0 # at least this should probably work # assert (doc/"//sam").size > 0 # this would be nice end def test_uxs_ignores_non_entities assert_equal 'abc', Hpricot.uxs('abc') end def test_uxs_handles_gt_lt_amp_quot assert_equal '"&<>', Hpricot.uxs('"&<>') end def test_uxs_handles_numeric_values if String.method_defined? :encoding assert_equal "é", Hpricot.uxs('é') else assert_equal "\303\251", Hpricot.uxs('é') end end def test_uxs_handles_hexadecimal_values if String.method_defined? :encoding assert_equal "é", Hpricot.uxs('é') else assert_equal "\303\251", Hpricot.uxs('é') end end def test_uxs_handles_entities if String.method_defined? :encoding assert_equal "é", Hpricot.uxs('é') else assert_equal "\303\251", Hpricot.uxs('é') end end def test_cdata_inner_text xml = Hpricot.XML(%{ 96586 STDERR }) assert_equal "This is STDOUT", (xml/:peon/:stdout).inner_text assert_equal "This is STDERR", (xml/:peon/:stderr).inner_text end def test_parsing_html_with_noscript doc = Hpricot(<<-edoc)

Testing

edoc assert_equal "7ff5e90iormq5niy6x98j75", doc.at("/html/head/meta[@name='verification']")['content'] end def test_nil_attr # parsing this file was failing on JRuby assert_nothing_raised {Hpricot.parse(TestFiles::BNQT)} end def test_unknown_tag header = <<-edoc
blah
edoc doc = Hpricot(<<-edoc)
#{header}
edoc assert_equal header.chomp, (doc/"#htest").to_html end def test_nested_unknown_tags header = %(
) doc = Hpricot(%(
#{header}
)) assert_equal header.chomp, (doc/"#htest").to_html end end hpricot-0.8.6/test/test_builder.rb0000644000175000017500000000201411710073440016536 0ustar boutilboutil# -*- coding: utf-8 -*- #!/usr/bin/env ruby require 'test/unit' require 'hpricot' class TestBuilder < Test::Unit::TestCase def test_escaping_text doc = Hpricot() { b "" } assert_equal "<a"b>", doc.to_html assert_equal %{}, doc.at("text()").to_s end def test_no_escaping_text doc = Hpricot() { div.test.me! { text "" } } assert_equal %{
}, doc.to_html assert_equal %{}, doc.at("text()").to_s end def test_latin1_entities doc = Hpricot() { b "€•" } assert_equal "€•", doc.to_html assert_equal "€•", doc.at("text()").to_s end def test_escaping_attrs text = "Some text" assert_equal "Some text", Hpricot(text).to_html end def test_korean_utf8_entities a = '한글' doc = Hpricot() { b a } assert_equal "한글", doc.to_html end end hpricot-0.8.6/test/test_alter.rb0000644000175000017500000000665311710073440016234 0ustar boutilboutil# -*- coding: utf-8 -*- #!/usr/bin/env ruby require 'test/unit' require 'hpricot' require 'load_files' class TestAlter < Test::Unit::TestCase def setup @basic = Hpricot.parse(TestFiles::BASIC) end def test_before test0 = "" @basic.at("link").before(test0) assert_equal 'test0.css', @basic.at("link").attributes['href'] end def test_after test_inf = "" @basic.search("link")[-1].after(test_inf) assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href'] end def test_wrap ohmy = (@basic/"p.ohmy").wrap("
") assert_equal 'wrapper', ohmy[0].parent['id'] assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class'] end def test_add_class first_p = (@basic/"p:first").add_class("testing123") assert first_p[0].get_attribute("class").split(" ").include?("testing123") assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123") assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123") end def test_change_attributes all_ps = (@basic/"p").attr("title", "Some Title & Etc…") all_as = (@basic/"a").attr("href", "http://my_new_href.com") all_lb = (@basic/"link").attr("href") { |e| e.name } assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title & Etc…"} assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"} assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" } end def test_change_attributes2 all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com" all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…" assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com" assert_equal (@basic%"p").raw_attributes["title"], "Some Title & Etc…" assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…" end def test_remove_attr all_rl = (@basic/"link").remove_attr("href") assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? } end def test_remove_class all_c1 = (@basic/"p[@class*='last']").remove_class("last") assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' } end def test_remove_all_classes all_c2 = (@basic/"p[@class]").remove_class assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? } end def test_xml_casing doc = Hpricot.XML("text") (doc/:root/:wildCat).after("gravity") assert_equal doc.to_s, "textgravity" frag = Hpricot.XML do b { i "A bit of HTML" } end (frag/:b).after("gravity") assert_equal frag.to_s, "A bit of HTMLgravity" end def test_reparent_empty_nodes doc = Hpricot("
") doc.root.inner_html = "foo" assert_equal doc.root.inner_html, "foo" doc.root.inner_html = "" assert_equal doc.root.inner_html, "" doc.root.swap { b "test" } assert_equal doc.root.inner_html, "test" end def assert_changed original, selector, set, &block assert set.all?(&block) assert Hpricot(original.to_html).search(selector).all?(&block) end end hpricot-0.8.6/test/nokogiri-bench.rb0000644000175000017500000000304611710073440016755 0ustar boutilboutil#!/usr/bin/env ruby require 'rubygems' require 'open-uri' require 'hpricot' require 'nokogiri' require 'benchmark' content = File.read("test/files/boingboing.html") N = 100 unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161') abort "** Use higher than Hpricot 0.6.161!" end puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}" hdoc = Hpricot(content) ndoc = Nokogiri.Hpricot(content) Benchmark.bm do |x| x.report('hpricot:doc') do N.times do Hpricot(content) end end x.report('nokogiri:doc') do N.times do Nokogiri.Hpricot(content) end end end Benchmark.bm do |x| x.report('hpricot:xpath') do N.times do info = hdoc.search("//a[@name='027906']").first.inner_text url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text end end x.report('nokogiri:xpath') do N.times do info = ndoc.search("//a[@name='027906']").first.inner_text url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text end end end Benchmark.bm do |x| x.report('hpricot:css') do N.times do info = hdoc.search('form input[@checked]').first url = hdoc.search('td spacer').first.inner_text end end x.report('nokogiri:css') do N.times do info = ndoc.search('form input[@checked]').first url = ndoc.search('td spacer').first.inner_text end end end hpricot-0.8.6/test/load_files.rb0000644000175000017500000000032211710073440016152 0ustar boutilboutilmodule TestFiles Dir.chdir(File.dirname(__FILE__)) do Dir['files/*.{html,xhtml,xml}'].each do |fname| const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname) end end end hpricot-0.8.6/test/files/0000755000175000017500000000000011710073440014631 5ustar boutilboutilhpricot-0.8.6/test/files/why.xml0000644000175000017500000000336211710073440016166 0ustar boutilboutil why the lucky stiff http://whytheluckystiff.net hex-editing reality to give us infinite grenades!! en-us 2007-01-16T22:39:04+00:00 hourly 1 2000-01-01T12:00+00:00 1.3http://whytheluckystiff.net/quatrains/1.3.htmlquatrains/1.3@http://whytheluckystiff.netquatrainsquatrainswhy the lucky stiff2007-01-14T08:47:05+00:00<blockquote> <p>That cadillac of yours and that driver of yours!<br />You and your teacups rattling away in the back seat!<br />You always took the mike, oh, and all those cowboys you shot!<br />I held your hand! And I&#8217;ll shoot a cowboy one day!</p> </blockquote> <blockquote> <p>You said, &#8220;Let&#8217;s run into the woods like kids!&#8221; <br />You said, &#8220;Let&#8217;s rub our hands together super-hot!&#8221; <br />And we scalded the trees and left octagons, I think that was you and<br />You threw parties on the roof!</p> </blockquote> hpricot-0.8.6/test/files/week9.html0000644000175000017500000033301511710073440016550 0ustar boutilboutil NFL Football Scores - CBS SportsLine.com
Scoreboard presented by
XXLXXL
XXL
PARTNER LINKS   NFL.com  PGATOUR.com  NCAAsports.com  
   Register  · Help 
|    powered by Google
CBS SportsLine.com NFL Football Sports News
 Home  NFL  NBA  MLB  NHL  Coll FB  Coll BK  PGA TOUR  Autos  Tennis  Horses  More  
  Fantasy   Mobile  Games  Contests   Shop  
· Home
· NFL
· NCAA
· MLB
· NBA
· NHL
· Fantasy
  NFL HomeScoreboardStandings | Schedules | Stats | Teams | Players | Transactions | Injuries | Fantasy News
 
Scoreboard

   
ALERT:
Preseason WeekHall of Fame · 1 · 2 · 3 · 4
Week1 · 2 · 3 · 4 · 5 · 6 · 7 · 8 · 9 · 10 · 11 · 12 · 13 · 14 · 15 · 16 · 17

 
Open Date: Arizona, Carolina, N.Y. Jets, Philadelphia All times are US/Eastern
 

 
Sunday, November 5, 2006
Ford Field
Atlanta (5-3-0)770014
Detroit  (2-6-0) «10731030
Game Leaders
ATL: M. Vick (17-32, 163), M. Vick (10-80)
DET: J. Kitna (20-32, 321), R. Williams (6-138)
Recap · GameCenter · Live Commentary (Glog)
 
Soldier Field
Miami  (2-6-0) «01471031
Chicago (7-1-0)373013
Game Leaders
MIA: J. Harrington (16-32, 137), R. Brown (29-157)
CHI: R. Grossman (18-42, 210), T. Jones (20-69)
Recap · GameCenter · Live Commentary (Glog)
 
Ralph Wilson Stadium
Green Bay (3-5-0)007310
Buffalo  (3-5-0) «3701424
Game Leaders
GB: B. Favre (28-47, 287), A. Green (23-122)
BUF: J. Losman (8-15, 102), A. Thomas (20-95)
Recap · GameCenter · Live Commentary (Glog)
 
M&T Bank Stadium
Cincinnati (4-4-0)0731020
Baltimore  (6-2-0) «1436326
Game Leaders
CIN: C. Palmer (12-26, 195), C. Henry (2-79)
BAL: S. McNair (21-31, 245), T. Heap (4-84)
Recap · GameCenter · Live Commentary (Glog)
 
FedEx Field
Dallas (4-4-0)0127019
Washington  (3-5-0) «5701022
Game Leaders
DAL: T. Romo (24-36, 284), P. Crayton (4-84)
WAS: M. Brunell (14-23, 192), C. Portis (23-84)
Recap · GameCenter · Live Commentary (Glog)
 
Raymond James Stadium
New Orleans  (6-2-0) «14314031
Tampa Bay (2-6-0)0140014
Game Leaders
NO: D. Brees (24-32, 314), M. Colston (11-123)
TB: B. Gradkowski (18-31, 185), J. Galloway (4-97)
Recap · GameCenter · Live Commentary (Glog)
 
Edward Jones Dome
Kansas City  (5-3-0) «7170731
St. Louis (4-4-0)0107017
Game Leaders
KC: D. Huard (10-15, 148), L. Johnson (27-172)
STL: M. Bulger (31-42, 354), S. Jackson (13-133)
Recap · GameCenter · Live Commentary (Glog)
 
Giants Stadium
Houston (2-6-0)037010
NY Giants  (6-2-0) «700714
Game Leaders
HOU: D. Carr (21-30, 176), A. Johnson (9-83)
NYG: E. Manning (17-28, 179), T. Barber (17-115)
Recap · GameCenter · Live Commentary (Glog)
 
ALLTEL Stadium
Tennessee (2-6-0)00077
Jacksonville  (5-3-0) «14617037
Game Leaders
TEN: V. Young (15-36, 163), B. Scaife (5-70)
JAC: D. Garrard (12-22, 177), F. Taylor (13-79)
Recap · GameCenter · Live Commentary (Glog)
 
Monster Park
Minnesota (4-4-0)30003
San Francisco  (3-5-0) «06039
Game Leaders
MIN: B. Johnson (21-31, 136), C. Taylor (26-96)
SF: A. Smith (13-21, 105), F. Gore (19-41)
Recap · GameCenter · Live Commentary (Glog)
 
Qualcomm Stadium
Cleveland (2-6-0)3901325
San Diego  (6-2-0) «3771532
Game Leaders
CLE: C. Frye (25-43, 236), K. Winslow (11-78)
SD: P. Rivers (19-28, 211), L. Tomlinson (18-172)
Recap · GameCenter · Live Commentary (Glog)
 
Heinz Field
Denver  (6-2-0) «14071031
Pittsburgh (2-6-0)0107320
Game Leaders
DEN: J. Plummer (16-27, 227), J. Walker (6-134)
PIT: B. Roethlisberger (38-54, 433), H. Ward (7-127)
Recap · GameCenter · Live Commentary (Glog)
 
Gillette Stadium
Indianapolis  (8-0-0) «7107327
New England (6-2-0)0143320
Game Leaders
IND: P. Manning (20-36, 326), M. Harrison (8-145)
NE: T. Brady (20-35, 201), L. Maroney (13-63)
Recap · GameCenter · Live Commentary (Glog)
 
 

Monday, November 6, 2006
Qwest Field
Oakland (2-6-0)00000
Seattle  (5-3-0) «1030316
Game Leaders
OAK: A. Walter (16-35, 166), R. Moss (6-76)
SEA: S. Wallace (18-30, 176), M. Morris (30-138)
Recap · GameCenter · Live Commentary (Glog)
 
 

Key:
possession  = Possession
«  = Game Winner
 = Red Zone
 = New Play
 

 
 
Fantasy Football at CBS SportsLine.com
Help · User Comments · Site Index · Privacy Policy · About Us · Terms of Service
CBS.com · CBSNews.com · TheShowBuzz.com · CBS Corporation · Advertise With Us
CBS Sports Store · The INSIDER · Entertainment Tonight
Copyright © 1995 - 2006 SportsLine.com, Inc. All rights reserved. SportsLine is a registered service mark of SportsLine.com, Inc.
CBS "eye device" is a registered trademark of CBS Broadcasting, Inc.
hpricot-0.8.6/test/files/utf8.html0000644000175000017500000013620311710073440016412 0ustar boutilboutil UTF-8 Sampler

UTF-8 SAMPLER

  ¥ · £ · € · $ · ¢ · ₡ · ₢ · ₣ · ₤ · ₥ · ₦ · ₧ · ₨ · ₩ · ₪ · ₫ · ₭ · ₮ · ₯

Frank da Cruz
The Kermit Project - Columbia University
New York City
fdc@columbia.edu

Last update: Wed Apr 12 16:54:07 2006


PEACE ] [ Poetry ] [ I Can Eat Glass ] [ The Quick Brown Fox ] [ HTML Features ] [ Credits, Tools, Commentary ]

UTF-8 is an ASCII-preserving encoding method for Unicode (ISO 10646), the Universal Character Set (UCS). The UCS encodes most of the world's writing systems in a single character set, allowing you to mix languages and scripts within a document without needing any tricks for switching character sets. This web page is encoded directly in UTF-8.

As shown HERE, Columbia University's Kermit 95 terminal emulation software can display UTF-8 plain text in Windows 95, 98, ME, NT, XP, or 2000 when using a monospace Unicode font like Andale Mono WT J or Everson Mono Terminal, or the lesser populated Courier New, Lucida Console, or Andale Mono. C-Kermit can handle it too, if you have a Unicode display. As many languages as are representable in your font can be seen on the screen at the same time.

This, however, is a Web page. Some Web browsers can handle UTF-8, some can't. And those that can might not have a sufficiently populated font to work with (some browsers might pick glyphs dynamically from multiple fonts; Netscape 6 seems to do this). CLICK HERE for a survey of Unicode fonts for Windows.

The subtitle above shows currency symbols of many lands. If they don't appear as blobs, we're off to a good start!


Poetry

From the Anglo-Saxon Rune Poem (Rune version):

ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ
ᛋᚳᛖᚪᛚ᛫ᚦᛖᚪᚻ᛫ᛗᚪᚾᚾᚪ᛫ᚷᛖᚻᚹᛦᛚᚳ᛫ᛗᛁᚳᛚᚢᚾ᛫ᚻᛦᛏ᛫ᛞᚫᛚᚪᚾ
ᚷᛁᚠ᛫ᚻᛖ᛫ᚹᛁᛚᛖ᛫ᚠᚩᚱ᛫ᛞᚱᛁᚻᛏᚾᛖ᛫ᛞᚩᛗᛖᛋ᛫ᚻᛚᛇᛏᚪᚾ᛬

From Laȝamon's Brut (The Chronicles of England, Middle English, West Midlands):

An preost wes on leoden, Laȝamon was ihoten
He wes Leovenaðes sone -- liðe him be Drihten.
He wonede at Ernleȝe at æðelen are chirechen,
Uppen Sevarne staþe, sel þar him þuhte,
Onfest Radestone, þer he bock radde.

(The third letter in the author's name is Yogh, missing from many fonts; CLICK HERE for another Middle English sample with some explanation of letters and encoding).

From the Tagelied of Wolfram von Eschenbach (Middle High German):

Sîne klâwen durh die wolken sint geslagen,
er stîget ûf mit grôzer kraft,
ich sih in grâwen tägelîch als er wil tagen,
den tac, der im geselleschaft
erwenden wil, dem werden man,
den ich mit sorgen în verliez.
ich bringe in hinnen, ob ich kan.
sîn vil manegiu tugent michz leisten hiez.

Some lines of Odysseus Elytis (Greek):

Τη γλώσσα μου έδωσαν ελληνική
το σπίτι φτωχικό στις αμμουδιές του Ομήρου.
Μονάχη έγνοια η γλώσσα μου στις αμμουδιές του Ομήρου.

από το Άξιον Εστί
του Οδυσσέα Ελύτη

The first stanza of Pushkin's Bronze Horseman (Russian):

На берегу пустынных волн
Стоял он, дум великих полн,
И вдаль глядел. Пред ним широко
Река неслася; бедный чёлн
По ней стремился одиноко.
По мшистым, топким берегам
Чернели избы здесь и там,
Приют убогого чухонца;
И лес, неведомый лучам
В тумане спрятанного солнца,
Кругом шумел.

Šota Rustaveli's Veṗxis Ṭq̇aosani, ̣︡Th, The Knight in the Tiger's Skin (Georgian):

ვეპხის ტყაოსანი შოთა რუსთაველი

ღმერთსი შემვედრე, ნუთუ კვლა დამხსნას სოფლისა შრომასა, ცეცხლს, წყალსა და მიწასა, ჰაერთა თანა მრომასა; მომცნეს ფრთენი და აღვფრინდე, მივჰხვდე მას ჩემსა ნდომასა, დღისით და ღამით ვჰხედვიდე მზისა ელვათა კრთომაასა.

Tamil poetry of Cupiramaniya Paarathiyar, சுப்ரமணிய பாரதியார் (1882-1921):

யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம்,
பாமரராய் விலங்குகளாய், உலகனைத்தும் இகழ்ச்சிசொலப் பான்மை கெட்டு,
நாமமது தமிழரெனக் கொண்டு இங்கு வாழ்ந்திடுதல் நன்றோ? சொல்லீர்!

I Can Eat Glass

And from the sublime to the ridiculous, here is a certain phrase¹ in an assortment of languages:

  1. Sanskrit: काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥
  2. Sanskrit (standard transcription): kācaṃ śaknomyattum; nopahinasti mām.
  3. Classical Greek: ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει.
  4. Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.
    Etruscan: (NEEDED)
  5. Latin: Vitrum edere possum; mihi non nocet.
  6. Old French: Je puis mangier del voirre. Ne me nuit.
  7. French: Je peux manger du verre, ça ne me fait pas de mal.
  8. Provençal / Occitan: Pòdi manjar de veire, me nafrariá pas.
  9. Québécois: J'peux manger d'la vitre, ça m'fa pas mal.
  10. Walloon: Dji pou magnî do vêre, çoula m' freut nén må.
    Champenois: (NEEDED)
    Lorrain: (NEEDED)
  11. Picard: Ch'peux mingi du verre, cha m'foé mie n'ma.
    Corsican: (NEEDED)
    Jèrriais: (NEEDED)
  12. Kreyòl Ayisyen: Mwen kap manje vè, li pa blese'm.
  13. Basque: Kristala jan dezaket, ez dit minik ematen.
  14. Catalan / Català: Puc menjar vidre, que no em fa mal.
  15. Spanish: Puedo comer vidrio, no me hace daño.
  16. Aragones: Puedo minchar beire, no me'n fa mal .
  17. Galician: Eu podo xantar cristais e non cortarme.
  18. Portuguese: Posso comer vidro, não me faz mal.
  19. Brazilian Portuguese (7): Posso comer vidro, não me machuca.
  20. Caboverdiano: M' podê cumê vidru, ca ta maguâ-m'.
  21. Papiamentu: Ami por kome glas anto e no ta hasimi daño.
  22. Italian: Posso mangiare il vetro e non mi fa male.
  23. Milanese: Sôn bôn de magnà el véder, el me fa minga mal.
  24. Roman: Me posso magna' er vetro, e nun me fa male.
  25. Napoletano: M' pozz magna' o'vetr, e nun m' fa mal.
  26. Sicilian: Puotsu mangiari u vitru, nun mi fa mali.
  27. Venetian: Mi posso magnare el vetro, no'l me fa mae.
  28. Zeneise (Genovese): Pòsso mangiâ o veddro e o no me fà mâ.
    Rheto-Romance / Romansch: (NEEDED)
    Romany / Tsigane: (NEEDED)
  29. Romanian: Pot să mănânc sticlă și ea nu mă rănește.
  30. Esperanto: Mi povas manĝi vitron, ĝi ne damaĝas min.
    Pictish: (NEEDED)
    Breton: (NEEDED)
  31. Cornish: Mý a yl dybry gwéder hag éf ny wra ow ankenya.
  32. Welsh: Dw i'n gallu bwyta gwydr, 'dyw e ddim yn gwneud dolur i mi.
  33. Manx Gaelic: Foddym gee glonney agh cha jean eh gortaghey mee.
  34. Old Irish (Ogham): ᚛᚛ᚉᚑᚅᚔᚉᚉᚔᚋ ᚔᚈᚔ ᚍᚂᚐᚅᚑ ᚅᚔᚋᚌᚓᚅᚐ᚜
  35. Old Irish (Latin): Con·iccim ithi nglano. Ním·géna.
  36. Irish: Is féidir liom gloinne a ithe. Ní dhéanann sí dochar ar bith dom.
  37. Scottish Gaelic: S urrainn dhomh gloinne ithe; cha ghoirtich i mi.
  38. Anglo-Saxon (Runes): ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬
  39. Anglo-Saxon (Latin): Ic mæg glæs eotan ond hit ne hearmiað me.
  40. Middle English: Ich canne glas eten and hit hirtiþ me nouȝt.
  41. English: I can eat glass and it doesn't hurt me.
  42. English (IPA): [aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː] (Received Pronunciation)
  43. English (Braille): ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑
  44. Lalland Scots / Doric: Ah can eat gless, it disnae hurt us.
    Glaswegian: (NEEDED)
  45. Gothic (4): 𐌼𐌰𐌲 𐌲𐌻𐌴𐍃 𐌹̈𐍄𐌰𐌽, 𐌽𐌹 𐌼𐌹𐍃 𐍅𐌿 𐌽𐌳𐌰𐌽 𐌱𐍂𐌹𐌲𐌲𐌹𐌸.
  46. Old Norse (Runes): ᛖᚴ ᚷᛖᛏ ᛖᛏᛁ ᚧ ᚷᛚᛖᚱ ᛘᚾ ᚦᛖᛋᛋ ᚨᚧ ᚡᛖ ᚱᚧᚨ ᛋᚨᚱ
  47. Old Norse (Latin): Ek get etið gler án þess að verða sár.
  48. Norsk / Norwegian (Nynorsk): Eg kan eta glas utan å skada meg.
  49. Norsk / Norwegian (Bokmål): Jeg kan spise glass uten å skade meg.
    Føroyskt / Faroese: (NEEDED)
  50. Íslenska / Icelandic: Ég get etið gler án þess að meiða mig.
  51. Svenska / Swedish: Jag kan äta glas utan att skada mig.
  52. Dansk / Danish: Jeg kan spise glas, det gør ikke ondt på mig.
  53. Soenderjysk: Æ ka æe glass uhen at det go mæ naue.
  54. Frysk / Frisian: Ik kin glês ite, it docht me net sear.
  55. Nederlands / Dutch: Ik kan glas eten, het doet mij geen kwaad.
  56. Kirchröadsj/Bôchesserplat: Iech ken glaas èèse, mer 't deet miech jing pieng.
  57. Afrikaans: Ek kan glas eet, maar dit doen my nie skade nie.
  58. Lëtzebuergescht / Luxemburgish: Ech kan Glas iessen, daat deet mir nët wei.
  59. Deutsch / German: Ich kann Glas essen, ohne mir weh zu tun.
  60. Ruhrdeutsch: Ich kann Glas verkasematuckeln, ohne dattet mich wat jucken tut.
  61. Langenfelder Platt: Isch kann Jlaas kimmeln, uuhne datt mich datt weh dääd.
  62. Lausitzer Mundart ("Lusatian"): Ich koann Gloos assn und doas dudd merr ni wii.
  63. Odenwälderisch: Iech konn glaasch voschbachteln ohne dass es mir ebbs daun doun dud.
  64. Sächsisch / Saxon: 'sch kann Glos essn, ohne dass'sch mer wehtue.
  65. Pfälzisch: Isch konn Glass fresse ohne dasses mer ebbes ausmache dud.
  66. Schwäbisch / Swabian: I kå Glas frässa, ond des macht mr nix!
  67. Bayrisch / Bavarian: I koh Glos esa, und es duard ma ned wei.
  68. Allemannisch: I kaun Gloos essen, es tuat ma ned weh.
  69. Schwyzerdütsch: Ich chan Glaas ässe, das tuet mir nöd weeh.
  70. Hungarian: Meg tudom enni az üveget, nem lesz tőle bajom.
  71. Suomi / Finnish: Voin syödä lasia, se ei vahingoita minua.
  72. Sami (Northern): Sáhtán borrat lása, dat ii leat bávččas.
  73. Erzian: Мон ярсан суликадо, ды зыян эйстэнзэ а ули.
    Karelian: (NEEDED)
    Vepsian: (NEEDED)
    Votian: (NEEDED)
    Livonian: (NEEDED)
  74. Estonian: Ma võin klaasi süüa, see ei tee mulle midagi.
  75. Latvian: Es varu ēst stiklu, tas man nekaitē.
  76. Lithuanian: Aš galiu valgyti stiklą ir jis manęs nežeidžia
    Old Prussian: (NEEDED)
    Sorbian (Wendish): (NEEDED)
  77. Czech: Mohu jíst sklo, neublíží mi.
  78. Slovak: Môžem jesť sklo. Nezraní ma.
  79. Polska / Polish: Mogę jeść szkło i mi nie szkodzi.
  80. Slovenian: Lahko jem steklo, ne da bi mi škodovalo.
  81. Croatian: Ja mogu jesti staklo i ne boli me.
  82. Serbian (Latin): Mogu jesti staklo a da mi ne škodi.
  83. Serbian (Cyrillic): Могу јести стакло а да ми не шкоди.
  84. Macedonian: Можам да јадам стакло, а не ме штета.
  85. Russian: Я могу есть стекло, оно мне не вредит.
  86. Belarusian (Cyrillic): Я магу есці шкло, яно мне не шкодзіць.
  87. Belarusian (Lacinka): Ja mahu jeści škło, jano mne ne škodzić.
  88. Ukrainian: Я можу їсти шкло, й воно мені не пошкодить.
  89. Bulgarian: Мога да ям стъкло, то не ми вреди.
  90. Georgian: მინას ვჭამ და არა მტკივა.
  91. Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։
  92. Albanian: Unë mund të ha qelq dhe nuk më gjen gjë.
  93. Turkish: Cam yiyebilirim, bana zararı dokunmaz.
  94. Turkish (Ottoman): جام ييه بلورم بڭا ضررى طوقونمز
  95. Bangla / Bengali: আমি কাঁচ খেতে পারি, তাতে আমার কোনো ক্ষতি হয় না।
  96. Marathi: मी काच खाऊ शकतो, मला ते दुखत नाही.
  97. Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती.
  98. Tamil: நான் கண்ணாடி சாப்பிடுவேன், அதனால் எனக்கு ஒரு கேடும் வராது.
  99. Urdu(2): میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی ۔
  100. Pashto(2): زه شيشه خوړلې شم، هغه ما نه خوږوي
  101. Farsi / Persian: .من می توانم بدونِ احساس درد شيشه بخورم
  102. Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني.
    Aramaic: (NEEDED)
  103. Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי.
  104. Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ.
    Judeo-Arabic: (NEEDED)
    Ladino: (NEEDED)
    Gǝʼǝz: (NEEDED)
    Amharic: (NEEDED)
  105. Twi: Metumi awe tumpan, ɜnyɜ me hwee.
  106. Hausa (Latin): Inā iya taunar gilāshi kuma in gamā lāfiyā.
  107. Hausa (Ajami) (2): إِنا إِىَ تَونَر غِلَاشِ كُمَ إِن غَمَا لَافِىَا
  108. Yoruba(3): Mo lè je̩ dígí, kò ní pa mí lára.
  109. (Ki)Swahili: Naweza kula bilauri na sikunyui.
  110. Malay: Saya boleh makan kaca dan ia tidak mencederakan saya.
  111. Tagalog: Kaya kong kumain nang bubog at hindi ako masaktan.
  112. Chamorro: Siña yo' chumocho krestat, ti ha na'lalamen yo'.
  113. Javanese: Aku isa mangan beling tanpa lara.
  114. Burmese: က္ယ္ဝန္‌တော္‌၊က္ယ္ဝန္‌မ မ္ယက္‌စားနုိင္‌သည္‌။ ၎က္ရောင္‌့ ထိခုိက္‌မ္ဟု မရ္ဟိပာ။ (7)
  115. Vietnamese (quốc ngữ): Tôi có thể ăn thủy tinh mà không hại gì.
  116. Vietnamese (nôm) (4): 些 𣎏 世 咹 水 晶 𦓡 空 𣎏 害 咦
    Khmer: (NEEDED)
    Lao: (NEEDED)
  117. Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ
  118. Mongolian (Cyrillic): Би шил идэй чадна, надад хортой биш
  119. Mongolian (Classic) (5): ᠪᠢ ᠰᠢᠯᠢ ᠢᠳᠡᠶᠦ ᠴᠢᠳᠠᠨᠠ ᠂ ᠨᠠᠳᠤᠷ ᠬᠣᠤᠷᠠᠳᠠᠢ ᠪᠢᠰᠢ
    Dzongkha: (NEEDED)
    Nepali: (NEEDED)
  120. Tibetan: ཤེལ་སྒོ་ཟ་ནས་ང་ན་གི་མ་རེད།
  121. Chinese: 我能吞下玻璃而不伤身体。
  122. Chinese (Traditional): 我能吞下玻璃而不傷身體。
  123. Taiwanese(6): Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong.
  124. Japanese: 私はガラスを食べられます。それは私を傷つけません。
  125. Korean: 나는 유리를 먹을 수 있어요. 그래도 아프지 않아요
  126. Bislama: Mi save kakae glas, hemi no save katem mi.
  127. Hawaiian: Hiki iaʻu ke ʻai i ke aniani; ʻaʻole nō lā au e ʻeha.
  128. Marquesan: E koʻana e kai i te karahi, mea ʻā, ʻaʻe hauhau.
  129. Chinook Jargon: Naika məkmək kakshət labutay, pi weyk ukuk munk-sik nay.
  130. Navajo: Tsésǫʼ yishą́ągo bííníshghah dóó doo shił neezgai da.
    Cherokee (and Cree, Ojibwa, Inuktitut, and other Native American languages): (NEEDED)
    Garifuna: (NEEDED)
    Gullah: (NEEDED)
  131. Lojban: mi kakne le nu citka le blaci .iku'i le se go'i na xrani mi
  132. Nórdicg: Ljœr ye caudran créneþ ý jor cẃran.

(Additions, corrections, completions, gratefully accepted.)

For testing purposes, some of these are repeated in a monospace font . . .

  1. Euro Symbol: €.
  2. Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.
  3. Íslenska / Icelandic: Ég get etið gler án þess að meiða mig.
  4. Polish: Mogę jeść szkło, i mi nie szkodzi.
  5. Romanian: Pot să mănânc sticlă și ea nu mă rănește.
  6. Ukrainian: Я можу їсти шкло, й воно мені не пошкодить.
  7. Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։
  8. Georgian: მინას ვჭამ და არა მტკივა.
  9. Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती.
  10. Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי.
  11. Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ.
  12. Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني.
  13. Japanese: 私はガラスを食べられます。それは私を傷つけません。
  14. Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ

Notes:

  1. The "I can eat glass" phrase and initial translations (about 30 of them) were borrowed from Ethan Mollick's I Can Eat Glass page (which disappeared on or about June 2004) and converted to UTF-8. Since Ethan's original page is gone, I should mention that his purpose was to offer travelers a phrase they could use in any country that would command a certain kind of respect, or at least get attention. See Credits for the many additional contributions since then. When submitting new entries, the word "hurt" (if you have a choice) is used in the sense of "cause harm", "do damage", or "bother", rather than "inflict pain" or "make sad". In this vein Otto Stolz comments (as do others further down; personally I think it's better for the purpose of this page to have extra entries and/or to show a greater repertoire of characters than it is to enforce a strict interpretation of the word "hurt"!):

    This is the meaning I have translated to the Swabian dialect. However, I just have noticed that most of the German variants translate the "inflict pain" meaning. The German example should rather read:

    "Ich kann Glas essen ohne mir zu schaden."

    (The comma fell victim to the 1996 orthographic reform, cf. http://www.ids-mannheim.de/reform/e3-1.html#P76.

    You may wish to contact the contributors of the following translations to correct them:

    • Lëtzebuergescht / Luxemburgish: Ech kan Glas iessen, daat deet mir nët wei.
    • Lausitzer Mundart ("Lusatian"): Ich koann Gloos assn und doas dudd merr ni wii.
    • Sächsisch / Saxon: 'sch kann Glos essn, ohne dass'sch mer wehtue.
    • Bayrisch / Bavarian: I koh Glos esa, und es duard ma ned wei.
    • Allemannisch: I kaun Gloos essen, es tuat ma ned weh.
    • Schwyzerdütsch: Ich chan Glaas ässe, das tuet mir nöd weeh.

    In contrast, I deem the following translations *alright*:

    • Ruhrdeutsch: Ich kann Glas verkasematuckeln, ohne dattet mich wat jucken tut.
    • Pfälzisch: Isch konn Glass fresse ohne dasses mer ebbes ausmache dud.
    • Schwäbisch / Swabian: I kå Glas frässa, ond des macht mr nix!

    (However, you could remove the commas, on account of http://www.ids-mannheim.de/reform/e3-1.html#P76 and http://www.ids-mannheim.de/reform/e3-1.html#P72, respectively.)

    I guess, also these examples translate the wrong sense of "hurt", though I do not know these languages well enough to assert them definitely:

    • Nederlands / Dutch: Ik kan glas eten; het doet mij geen pijn. (This one has been changed)
    • Kirchröadsj/Bôchesserplat: Iech ken glaas èèse, mer 't deet miech jing pieng.

    In the Romanic languages, the variations on "fa male" (it) are probably wrong, whilst the variations on "hace daño" (es) and "damaĝas" (Esperanto) are probably correct; "nocet" (la) is definitely right.

    The northern Germanic variants of "skada" are probably right, as are the Slavic variants of "škodi/шкоди" (se); however the Slavic variants of " boli" (hv) are probably wrong, as "bolena" means "pain/ache", IIRC.

    The numbering of the samples is arbitrary, done only to keep track of how many there are, and can change any time a new entry is added. The arrangement is also arbitrary but with some attempt to group related examples together. Note: All languages not listed are wanted, not just the ones that say (NEEDED).

  2. Correct right-to-left display of these languages depends on the capabilities of your browser. The period should appear on the left. In the monospace Yiddish example, the Yiddish digraphs should occupy one character cell.
  3. Yoruba: The third word is Latin letter small 'j' followed by small 'e' with U+0329, Combining Vertical Line Below. This displays correctly only if your Unicode font includes the U+0329 glyph and your browser supports combining diacritical marks. The Indic examples also include combining sequences.
  4. Includes Unicode 3.1 (or later) characters beyond Plane 0.
  5. The Classic Mongolian example should be vertical, top-to-bottom and left-to-right. But such display is almost impossible. Also no font yet exists which provides the proper ligatures and positional variants for the characters of this script, which works somewhat like Arabic.
  6. Taiwanese is also known as Holo or Hoklo, and is related to Southern Min dialects such as Amoy. Contributed by Henry H. Tan-Tenn, who comments, "The above is the romanized version, in a script current among Taiwanese Christians since the mid-19th century. It was invented by British missionaries and saw use in hundreds of published works, mostly of a religious nature. Most Taiwanese did not know Chinese characters then, or at least not well enough to read. More to the point, though, a written standard using Chinese characters has never developed, so a significant minority of words are represented with different candidate characters, depending on one's personal preference or etymological theory. In this sentence, for example, "-tàng", "chia̍h", "mā" and "bē" are problematic using Chinese characters. "Góa" (I/me) and "po-lê" (glass) are as written in other Sinitic languages (e.g. Mandarin, Hakka)."
  7. Wagner Amaral of Pinese & Amaral Associados notes that the Brazilian Portuguese sentence for "I can eat glass" should be identical to the Portuguese one, as the word "machuca" means "inflict pain", or rather "injuries". The words "faz mal" would more correctly translate as "cause harm".
  8. Burmese: In English the first person pronoun "I" stands for both genders, male and female. In Burmese (except in the central part of Burma) kyundaw (က္ယ္ဝန္‌တော္‌) for male and kyanma (က္ယ္ဝန္‌မ) for female. Using here a fully-compliant Unicode Burmese font -- sadly one and only Padauk Graphite font exists -- rendering using graphite engine. CLICK HERE to test Burmese characters.

The Quick Brown Fox

The "I can eat glass" sentences do not necessarily show off the orthography of each language to best advantage. In many alphabetic written languages it is possible to include all (or most) letters (or "special" characters) in a single (often nonsense) pangram. These were traditionally used in typewriter instruction; now they are useful for stress-testing computer fonts and keyboard input methods. Here are a few examples (SEND MORE):

  1. English: The quick brown fox jumps over the lazy dog.
  2. Irish: "An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena ṗóg éada ó ṡlí do leasa ṫú?" "D'ḟuascail Íosa Úrṁac na hÓiġe Beannaiṫe pór Éava agus Áḋaiṁ."
  3. Dutch: Pa's wijze lynx bezag vroom het fikse aquaduct.
  4. German: Falsches Üben von Xylophonmusik quält jeden größeren Zwerg. (1)
  5. German: Im finſteren Jagdſchloß am offenen Felsquellwaſſer patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem verſifften kniffligen C-Xylophon. (2)
  6. Swedish: Flygande bäckasiner söka strax hwila på mjuka tuvor.
  7. Icelandic: Sævör grét áðan því úlpan var ónýt.
  8. Polish: Pchnąć w tę łódź jeża lub ośm skrzyń fig.
  9. Czech: Příliš žluťoučký kůň úpěl ďábelské kódy.
  10. Slovak: Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote.
  11. Russian: В чащах юга жил-был цитрус? Да, но фальшивый экземпляр! ёъ.
  12. Bulgarian: Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон.
  13. Sami (Northern): Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža.
  14. Hungarian: Árvíztűrő tükörfúrógép.
  15. Spanish: El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.
  16. Portuguese: O próximo vôo à noite sobre o Atlântico, põe freqüentemente o único médico. (3)
  17. French: Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus et de voir leurs drôles d'œufs abîmés.
  18. Esperanto: Eĥoŝanĝo ĉiuĵaŭde.
  19. Hebrew: זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן.
  20. Japanese (Hiragana):
    いろはにほへど ちりぬるを
    わがよたれぞ つねならむ
    うゐのおくやま けふこえて
    あさきゆめみじ ゑひもせず (4)

Notes:

  1. Other phrases commonly used in Germany include: "Ein wackerer Bayer vertilgt ja bequem zwo Pfund Kalbshaxe" and, more recently, "Franz jagt im komplett verwahrlosten Taxi quer durch Bayern", but both lack umlauts and esszet. Previously, going for the shortest sentence that has all the umlauts and special characters, I had "Grüße aus Bärenhöfe (und Óechtringen)!" Acute accents are not used in native German words, so I was surprised to discover "Óechtringen" in the Deutsche Bundespost Postleitzahlenbuch:

    Click for full-size image (2.8MB)

    It's a small village in eastern Lower Saxony. The "oe" in this case turns out to be the Lower Saxon "lengthening e" (Dehnungs-e), which makes the previous vowel long (used in a number of Lower Saxon place names such as Soest and Itzehoe), not the "e" that indicates umlaut of the preceding vowel. Many thanks to the Óechtringen-Namenschreibungsuntersuchungskomitee (Alex Bochannek, Manfred Erren, Asmus Freytag, Christoph Päper, plus Werner Lemberg who serves as Óechtringen-Namenschreibungsuntersuchungskomiteerechtschreibungsprüfer) for their relentless pursuit of the facts in this case. Conclusion: the accent almost certainly does not belong on this (or any other native German) word, but neither can it be dismissed as dirt on the page. To add to the mystery, it has been reported that other copies of the same edition of the PLZB do not show the accent! UPDATE (March 2006): David Krings was intrigued enough by this report to contact the mayor of Ebstorf, of which Oechtringen is a borough, who responded:

    Sehr geehrter Mr. Krings,
    wenn Oechtringen irgendwo mit einem Akzent auf dem O geschrieben wurde, dann kann das nur ein Fehldruck sein. Die offizielle Schreibweise lautet jedenfalls „Oechtringen“.
    Mit freundlichen Grüssen
    Der Samtgemeindebürgermeister
    i.A. Lothar Jessel

  2. From Karl Pentzlin (Kochel am See, Bavaria, Germany): "This German phrase is suited for display by a Fraktur (broken letter) font. It contains: all common three-letter ligatures: ffi ffl fft and all two-letter ligatures required by the Duden for Fraktur typesetting: ch ck ff fi fl ft ll ſch ſi ſſ ſt tz (all in a manner such they are not part of a three-letter ligature), one example of f-l where German typesetting rules prohibit ligating (marked by a ZWNJ), and all German letters a...z, ä,ö,ü,ß, ſ [long s] (all in a manner such that they are not part of a two-letter Fraktur ligature)." Otto Stolz notes that "'Schloß' is now spelled 'Schloss', in contrast to 'größer' (example 4) which has kept its 'ß'. Fraktur has been banned from general use, in 1942, and long-s (ſ) has ceased to be used with Antiqua (Roman) even earlier (the latest Antiqua-ſ I have seen is from 1913, but then I am no expert, so there may well be a later instance." Later Otto confirms the latter theory, "Now I've run across a book “Deutsche Rechtschreibung” (edited by Lutz Mackensen) from 1954 (my reprint is from 1956) that has kept the Antiqua-ſ in its dictionary part (but neither in the preface nor in the appendix)."

  3. Diaeresis is not used in Iberian Portuguese.

  4. From Yurio Miyazawa: "This poetry contains all the sounds in the Japanese language and used to be the first thing for children to learn in their Japanese class. The Hiragana version is particularly neat because it covers every character in the phonetic Hiragana character set." Yurio also sent the Kanji version:

    色は匂へど 散りぬるを
    我が世誰ぞ 常ならむ
    有為の奥山 今日越えて
    浅き夢見じ 酔ひもせず

Accented Cyrillic:

(This section contributed by Vladimir Marinov.)

In Bulgarian it is desirable, customary, or in some cases required to write accents over vowels. Unfortunately, no computer character sets contain the full repertoire of accented Cyrillic letters. With Unicode, however, it is possible to combine any Cyrillic letter with any combining accent. The appearance of the result depends on the font and the rendering engine. Here are two examples.

  1. Той видя бялата коса́ по главата и́ и ко́са на рамото и́, и ре́че да и́ рече́: "Пара́та по́ па́ри от па́рата, не ща пари́!", но си поми́сли: "Хей, помисли́ си! А́ и́ река, а́ е скочила в тази река, която щеше да тече́, а не те́че."

  2. По пъ́тя пъту́ват кю́рди и югославя́ни.

HTML Features

Here is the Russian alphabet (uppercase only) coded in three different ways, which should look identical:

  1. АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ   (Literal UTF-8)
  2. АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ   (Decimal numeric character reference)
  3. АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ   (Hexadecimal numeric character reference)

In another test, we use HTML language tags to distinguish Bulgarian, Russian, and Serbian, which have different italic forms for lowercase б, г, д, п, and/or т:

Bulgarian:   [ бгдпт ]   бгдпт ]   Мога да ям стъкло и не ме боли.
Russian: [ бгдпт ]   бгдпт ]   Я могу есть стекло, это мне не вредит.
Serbian: [ бгдпт ]   бгдпт ]   Могу јести стакло а да ми не шкоди.


Credits, Tools, and Commentary

Credits:
The "I can eat glass" phrase and the initial collection of translations: Ethan Mollick. Transcription / conversion to UTF-8: Frank da Cruz. Albanian: Sindi Keesan. Afrikaans: Johan Fourie, Kevin Poalses. Anglo Saxon: Frank da Cruz. Arabic: Najib Tounsi. Armenian: Vaçe Kundakçı. Belarusian: Alexey Chernyak. Bengali: Somnath Purkayastha, Deepayan Sarkar. Bislama: Dan McGarry. Braille: Frank da Cruz. Bulgarian: Sindi Keesan, Guentcho Skordev, Vladimir Marinov. Burmese: "cetanapa". Cabo Verde Creole: Cláudio Alexandre Duarte. Catalán: Jordi Bancells. Chinese: Jack Soo, Wong Pui Lam. Chinook Jargon: David Robertson. Cornish: Chris Stephens. Croatian: Marjan Baće. Czech: Stanislav Pecha, Radovan Garabík. Dutch: Peter Gotink. Pim Blokland, Rob Daniel, Rob de Wit. Erzian: Jack Rueter. Esperanto: Franko Luin, Radovan Garabík. Estonian: Meelis Roos. Farsi/Persian: Payam Elahi. Finnish: Sampsa Toivanen. French: Luc Carissimo, Anne Colin du Terrail, Sean M. Burke. Galician: Laura Probaos. Georgian: Giorgi Lebanidze. German: Christoph Päper, Otto Stolz, Karl Pentzlin, David Krings, Frank da Cruz. Gothic: Aurélien Coudurier. Greek: Ariel Glenn, Constantine Stathopoulos, Siva Nataraja. Hebrew: Jonathan Rosenne, Tal Barnea. Hausa: Malami Buba, Tom Gewecke. Hawaiian: na Hauʻoli Motta, Anela de Rego, Kaliko Trapp. Hindi: Shirish Kalele. Hungarian: András Rácz, Mark Holczhammer. Icelandic: Andrés Magnússon, Sveinn Baldursson. International Phonetic Alphabet (IPA): Siva Nataraja / Vincent Ramos. Irish: Michael Everson, Marion Gunn, James Kass, Curtis Clark. Italian: Thomas De Bellis. Japanese: Makoto Takahashi, Yurio Miyazawa. Kirchröadsj: Roger Stoffers. Kreyòl: Sean M. Burke. Korean: Jungshik Shin. Langenfelder Platt: David Krings. Lëtzebuergescht: Stefaan Eeckels. Lithuanian: Gediminas Grigas. Lojban: Edward Cherlin. Lusatian: Ronald Schaffhirt. Macedonian: Sindi Keesan. Malay: Zarina Mustapha. Manx: Éanna Ó Brádaigh. Marathi: Shirish Kalele. Marquesan: Kaliko Trapp. Middle English: Frank da Cruz. Milanese: Marco Cimarosti. Mongolian: Tom Gewecke. Napoletano: Diego Quintano. Navajo: Tom Gewecke. Nórdicg: Yẃlyan Rott. Norwegian: Herman Ranes. Odenwälderisch: Alexander Heß. Old Irish: Michael Everson. Old Norse: Andrés Magnússon. Papiamentu: Bianca and Denise Zanardi. Pashto: N.R. Liwal. Pfälzisch: Dr. Johannes Sander. Picard: Philippe Mennecier. Polish: Juliusz Chroboczek, Paweł Przeradowski. Portuguese: "Cláudio" Alexandre Duarte, Bianca and Denise Zanardi, Pedro Palhoto Matos, Wagner Amaral. Québécois: Laurent Detillieux. Roman: Pierpaolo Bernardi. Romanian: Juliusz Chroboczek, Ionel Mugurel. Ruhrdeutsch: "Timwi". Russian: Alexey Chernyak, Serge Nesterovitch. Sami: Anne Colin du Terrail, Luc Carissimo. Sanskrit: Siva Nataraja / Vincent Ramos. Sächsisch: André Müller. Schwäbisch: Otto Stolz. Scots: Jonathan Riddell. Serbian: Sindi Keesan, Ranko Narancic, Boris Daljevic, Szilvia Csorba. Slovak: G. Adam Stanislav, Radovan Garabík. Slovenian: Albert Kolar. Spanish: Aleida Muñoz, Laura Probaos. Swahili: Ronald Schaffhirt. Swedish: Christian Rose, Bengt Larsson. Taiwanese: Henry H. Tan-Tenn. Tagalog: Jim Soliven. Tamil: Vasee Vaseeharan. Tibetan: D. Germano, Tom Gewecke. Thai: Alan Wood's wife. Turkish: Vaçe Kundakçı, Tom Gewecke, Merlign Olnon. Ukrainian: Michael Zajac. Urdu: Mustafa Ali. Vietnamese: Dixon Au, [James] Đỗ Bá Phước 杜 伯 福. Walloon: Pablo Saratxaga. Welsh: Geiriadur Prifysgol Cymru (Andrew). Yiddish: Mark David, Zeneise: Angelo Pavese.

Tools Used to Create This Web Page:
The UTF8-aware Kermit 95 terminal emulator on Windows, to a Unix host with the EMACS text editor. Kermit 95 displays UTF-8 and also allows keyboard entry of arbitrary Unicode BMP characters as 4 hex digits, as shown HERE. Hex codes for Unicode values can be found in The Unicode Standard (recommended) and the online code charts. When submissions arrive by email encoded in some other character set (Latin-1, Latin-2, KOI, various PC code pages, JEUC, etc), I use the TRANSLATE command of C-Kermit on the Unix host (where I read my mail) to convert the character set to UTF-8 (I could also use Kermit 95 for this; it has the same TRANSLATE command). That's it -- no "Web authoring" tools, no locales, no "smart" anything. It's just plain text, nothing more. By the way, there's nothing special about EMACS -- any text editor will do, providing it allows entry of arbitrary 8-bit bytes as text, including the 0x80-0x9F "C1" range. EMACS 21.1 actually supports UTF-8; earlier versions don't know about it and display the octal codes; either way is OK for this purpose.

Commentary:
Date: Wed, 27 Feb 2002 13:21:59 +0100
From: "Bruno DEDOMINICIS" <b.dedominicis@cite-sciences.fr>
Subject: Je peux manger du verre, cela ne me fait pas mal.

I just found out your website and it makes me feel like proposing an interpretation of the choice of this peculiar phrase.

Glass is transparent and can hurt as everyone knows. The relation between people and civilisations is sometimes effusional and more often rude. The concept of breaking frontiers through globalization, in a way, is also an attempt to deny any difference. Isn't "transparency" the flag of modernity? Nothing should be hidden any more, authority is obsolete, and the new powers are supposed to reign through loving and smiling and no more through coercion...

Eating glass without pain sounds like a very nice metaphor of this attempt. That is, frontiers should become glass transparent first, and be denied by incorporating them. On the reverse, it shows that through globalization, frontiers undergo a process of displacement, that is, when they are not any more speakable, they become repressed from the speech and are therefore incorporated and might become painful symptoms, as for example what happens when one tries to eat glass.

The frontiers that used to separate bodies one from another tend to divide bodies from within and make them suffer.... The chosen phrase then appears as a denial of the symptom that might result from the destitution of traditional frontiers.

Best,
Bruno De Dominicis, Paris, France

Other Unicode pages onsite:

Unicode samplers offsite:

Unicode fonts:

[ Kermit 95 ] [ K95 Screen Shots ] [ C-Kermit ] [ Kermit Home ] [ Display Problems? ] [ The Unicode Consortium ]


UTF-8 Sampler / The Kermit Project / Columbia University / kermit@columbia.edu
hpricot-0.8.6/test/files/uswebgen.html0000644000175000017500000004121111710073440017335 0ustar boutilboutil Free Genealogy and Family History Online - The USGenWeb Project
The USGenWeb Project, Free Genealogy Online
Home About Us States Projects Researchers Volunteers

 

Keeping Internet Genealogy Free

Welcome to The USGenWeb Project! We are a group of volunteers working together to provide free genealogy websites for genealogical research in every county and every state of the United States. This Project is non-commercial and fully committed to free genealogy access for everyone.

Organization is by county and state, and this website provides you with links to all the state genealogy websites which, in turn, provide gateways to the counties. The USGenWeb Project also sponsors important Special Projects at the national level and this website provides an entry point to all of those pages, as well.

Clicking on a State Link (on the left) will take you to the State's website. Clicking on the tabs above will take you to additional information and links.

All of the volunteers who make up The USGenWeb Project are very proud of this endeavor and hope that you will find their hard work both beneficial and rewarding. Thank you for visiting!

The USGenWeb Project Team

10th Anniversary

2006 marks the 10th Anniversary of the USGenWeb Project and I have been looking back over those past 10 years. When the USGenWeb Project began, it was one of the few (if not the only) centralized places on the internet to find genealogy information and post a query. Those early state and county sites began with links to the small amount of on-line information of interest to a family historian and a query page. The only Special Project was the Archives. How far the Project has come during the past 10 years! Now there are several special projects and the states, counties and special projects sites of the Project not only contain links; they are filled with information and transcribed records, and more is being added every day by our wonderful, dedicated and hard working volunteers.

Ten years ago the internet, as we know it today, was in its infancy. The things we take for granted today--e-mail, PCs, cell phones, digital cameras, etc., were not in the average person's world. Family historians and professional genealogists not only didn't use the internet, most had never heard of it.

Over the past 10 years the internet has gone from obscurity to commonplace. As the internet became an every day tool for millions of people. it changed the way family historians do research. The availability of on-line, easily accessible genealogy and historical information has fueled the phenomenal growth of Genealogy as a hobby and, I'm proud to say, the Project has been right there every step of the way.

Everywhere we look we see genealogy reported as the fastest growing hobby in the country. Now the internet is the first stop for beginning family historians and is used extensively by experienced researchers. New "How To" genealogy books devote chapters to using the internet, and it is a rare book that does not recommend The USGenWeb Project as one of the first places to visit.

While subscription sites have popped up everywhere on the web, The Project has continued to offer free access to its vast wealth of information. The USGenWeb Project is recognized as the premier site of free information, and the Project's websites welcome well over a million visitors each day.

The Project is where it is today because of the thousands of volunteers, both past and present, who cared enough to devote, collectively, millions of hours to gathering, transcribing and uploading information.

To each and every volunteer, past and present, a heartfelt Thank You, because you are ones who have made The Project the fabulous resource it is today.

Linda Haas Davenport
National Coordinator
The USGenWeb Project



The USGenWeb Project, Free Genealogy Online


Comments and administrative-type problems should be emailed to the National Coordinator. For complaints regarding a specific web site within the USGenWeb Project, please include the URL when emailing the National Coordinator.

Direct comments or suggestions about this web site to the Webmaster.


Visit Rootsweb

Home
About Us
Projects
for Researchers
for Volunteers
Site Map

hpricot-0.8.6/test/files/tenderlove.html0000644000175000017500000000103311710073440017663 0ustar boutilboutil My Site! Your Site! hpricot-0.8.6/test/files/pace_application.html0000644000175000017500000012325511710073440021022 0ustar boutilboutil
First Health Services Corporation logo Pennsylvania Department of Aging logo
PACE/PACENET Enrollment Need Assistance?
Call: Inside PA 1-800-225-7223 Outside PA 717-651-3600
Email: PACECares@fhsc.com

Check if same as Applicant
Preparer's Name Preparer's Phone () -

Please enter the Preparer's Name & Phone #.
When you are done, click on 'Continue'.




Immobiliensuche - hier sind sie richtig!

Wir haben 30 Immobilien für Sie gefunden.

Listenansicht   Galerieansicht
Seiten: 1 2 >
nach: 

Erklärung: N Neu ! Aktuell


  Zimmer
Stadt/Kreis
Wohnfläche
Stadtteil/Ort
Kaltmiete
Straße
Foto
! Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 3,00
Erlangen
111,00 m²
Erlangen - Zentrum
999,45 EUR

Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 3,00
Erlangen
110,19 m²
Erlangen - Zentrum
991,71 EUR

Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 2,00
Erlangen
104,51 m²
Erlangen - Zentrum
940,59 EUR

N Ideal fr Rollstuhlfahrer die Wohnung und die Lage Balkon Thumbnail
 2,00
Erlangen
103,42 m²
Erlangen - Zentrum
930,78 EUR

Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 3,00
Erlangen
103,00 m²
Erlangen - Zentrum
926,82 EUR

120 m 4ZiDesignerGaleriewohnung mit Kamin, Eckbadewanne, Fubodenheizung, Einbaukche 920 1.8.06 Einbauküche Balkon Thumbnail
 4,00
Erlangen
125,00 m²
Erlangen Süd
920,00 EUR
Wohnanlage Nahe Erlangen in Möhrendorf
120 m 4ZiDesignerGaleriewohnung mit Kamin, Eckbadewanne, Fubodenheizung, Einbaukche 920 1.8.06 Einbauküche Balkon Thumbnail
 4,00
Erlangen
125,00 m²
Erlangen Süd
890,00 EUR
Wohnanlage Nahe Erlangen in Möhrendorf
Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 2,00
Erlangen
94,95 m²
Erlangen - Zentrum
854,55 EUR

Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 2,00
Erlangen
92,00 m²
Erlangen - Zentrum
827,64 EUR

N Ideal fr Rollstuhlfahrer die Wohnung und die Lage Balkon Thumbnail
 3,00
Erlangen
83,00 m²
Erlangen Süd
825,00 EUR
Nahe Unistr
Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 2,00
Erlangen
87,16 m²
Erlangen - Zentrum
784,44 EUR

Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 2,00
Erlangen
79,46 m²
Erlangen - Zentrum
715,14 EUR

Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 2,00
Erlangen
74,02 m²
Erlangen - Zentrum
666,18 EUR

N Ideal fr Rollstuhlfahrer die Wohnung und die Lage Balkon Thumbnail
 2,00
Erlangen
74,00 m²
Erlangen - Zentrum
665,28 EUR

Schn Wohnen Nahe der Universittsstr. mit kurzen Wegen zu allem Balkon Thumbnail
 2,00
Erlangen
74,00 m²
Erlangen - Zentrum
665,28 EUR


Seiten: 1 2 >
nach:


powered by ImmobilienScout24
hpricot-0.8.6/test/files/cy0.html0000644000175000017500000217706011710073440016227 0ustar boutilboutil #41: 1cy0.html on Hpricot

Ticket #41: 1cy0.html

File 1cy0.html, 63.9 kB (added by alexgutteridge, 1 month ago)

html

Line 
1
2
3
4
5 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
6 <html lang="en">
7         <head>
8 <link rel="alternate" type="application/rss+xml" title="RCSB PDB - Latest Released Structures" href="/pdb/rss/LastLoad">       
9                 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
10                 <title>RCSB PDB : Structure Explorer</title>
11                 <link rel="shortcut icon" href="/pdbstatic/images/pdb.ico" type="image/x-icon">
12                 <link rel="stylesheet" href="/pdb/skins/pastel/styles/pdbcomp.css" type="text/css">
13                  
14                 <!-- please read /pdbstatic/common/lib/comp.js.README if you need to modify some javascript for the site -->
15                 <script type="text/javascript" language="JavaScript" src="/pdbstatic/common/lib/comp.js"></script>
16                 <script type="text/javascript" language="JavaScript" src="/pdbstatic/common/lib/expandable.js"></script>
17                 <script type="text/javascript" language="JavaScript" src="/pdbstatic/common/lib/results.js"></script>
18                 <script type="text/javascript" language="JavaScript" src="/pdbstatic/common/lib/pdbajax.js"></script>
19                 <script type="text/javascript" language="JavaScript" src="/pdbstatic/common/lib/htmlhttprequest_commented.js"></script>
20                  
21                  
22                
23
24
25
26
27
28
29
30
31
32 <script type="text/javascript" language="JavaScript">
33
34 var resultSorts = new Array();
35
36 function openWindow(url){
37       var w = window.open ("", "win", "height=600,width=800,location,menubar,resizable,scrollbars,status,toolbar");
38                 w.location.href = url;
39                 w.focus();
40 }
41 function openWindowToSize(url,width,height){
42       var w = window.open ("", "win", "height="+height+",width="+width+",left=80,top=100,location,menubar,resizable,scrollbars,status,toolbar");
43                 w.location.href = url;
44                 w.focus();
45 }
46
47 // due to the poor way that the treeview code generates the fullLink, we need to have
48 // our openWindowFromNav method use numbers to reference a site if the href has any quotes
49 var roboloc = '/robohelp/';
50 navsites = [
51                         "http://deposit.rcsb.org",
52                         "http://sg.pdb.org",
53                         "http://targetdb.pdb.org",
54                         "http://function.rcsb.org:8080/pdb/function_distribution/index.html",
55                         "http://mmcif.pdb.org",
56                         "http://pdbml.rcsb.org",
57                         "http://sw-tools.pdb.org",
58                         "http://deposit.pdb.org/cc_dict_tut.html",
59                         "http://pepcdb.pdb.org/"
60                        
61                     ];
62 helpsites = [
63             "data_deposition_and_processing/how_to_deposit.htm",
64             "validation_server/how_to_validate.htm",
65             "data_download/how_to_download.htm",
66             "search_database/how_to_search.htm",
67             "browse_database/how_to_browse.htm",
68             "molecular_viewers/introduction_to_molecular_viewers.htm",
69             "files_formats/structures/chemical_component_format/chemical_component_introduction.htm",
70             "data_download/structure_download.htm", // 7
71             "data_download/ftp_services.htm", // 8
72             "data_download/theoretical_models.htm", //9
73             "data_download/cd_rom_dvd.htm", //10
74             "structuralgenomics/sg_home.htm",
75             "structuralgenomics/targetdb.htm",
76             "structuralgenomics/pepcdb.htm",
77             "structuralgenomics/function_distributions.htm", // 14
78             "files_formats/intro_dictionaries_file_formats.htm", //15
79             "software/tools/deposition_mmcif_tools.htm", //16
80             "data_deposition_and_processing/how_to_deposit.htm", //17
81             "data_deposition_and_processing/deposition_tools_at_a_glance.htm", //18
82             "site_navigation/citing_the_pdb.htm" //19
83                     ];
84 navalerts = [
85                         "Discussion Forums are not yet active.  Please check back later."
86                     ];
87 function openWindowFromNav(urltype, urlnumber)
88 {
89         if (urltype == 0) // new windows
90         {
91 //              window.open (navsites[urlnumber], "win", "height=600,width=800,location,menubar,resizable,scrollbars,status,toolbar");
92 openWindow(navsites[urlnumber]);
93         }
94         else if (urltype == 1) // robo window
95         {
96                 callHelp(roboloc, helpsites[urlnumber]);
97         }
98         else if (urltype == 2) // javascript alerts
99         {
100                 alert(navalerts[urlnumber]);
101         }
102 }
103
104
105 // You can find instructions for this file at http://www.treeview.net
106
107 //Environment variables are usually set at the top of this file.
108 var ICONPATH = '/pdb/skins/pastel/images/tree/';
109 USETEXTLINKS = 1;
110 STARTALLOPEN = 0;
111 USEFRAMES = 0;
112 USEICONS = 1;
113 WRAPTEXT = 0;
114 PRESERVESTATE = 1;
115 HIGHLIGHT=1;
116 ALLOWTOPLEVELRETRACTION = 1;
117
118
119
120
121 var pdbPathNames = new Object(); // associative array of links to xIDs of tree
122
123 function myInsDoc(nn,folder,opt,link,iconsrc)
124 {
125   //if (opt == 'S')
126   {
127     if (link.substring(0,5) == '/pdb/')
128     {
129       pdbPathNames[link] = nn;
130     }
131   }
132   var myInsDocSub = insDoc(folder,gLnk(opt, nn, link.replace(/'/g, "\\'")));
133   myInsDocSub.xID = nn;
134   if (iconsrc)
135   {
136     myInsDocSub.iconSrc = iconsrc;
137   }
138   else
139   {
140     myInsDocSub.iconSrc = ICONPATH + 'ftvsquare.gif';
141   }
142   return myInsDocSub;
143 }
144
145 function myInsFolder(nn,folder,link)
146 {
147   if (link.substring(0,5) == '/pdb/')
148   {
149     pdbPathNames[link] = nn;
150   }
151   var myInsFldSub = insFld(folder,gFld(nn, link.replace(/'/g, "\\'")));
152   myInsFldSub.xID = nn.replace(/amp;/g, '');
153   return myInsFldSub;
154 }
155
156 function checkLocation()
157 {
158   var pn = window.location.pathname + window.location.search;
159   for (prop in pdbPathNames)
160   {
161     if (pn.indexOf(prop) >= 0)
162     {
163           var docObj;
164           docObj = findObj(pdbPathNames[prop]);
165           docObj.forceOpeningOfAncestorFolders();
166           highlightObjLink(docObj);
167           break;
168     }
169   }
170 }
171
172
173
174 function addHomeTree()
175 {
176
177 }
178
179 function addSearchTree()
180 {
181
182 }
183
184
185
186 function addExploreTree()
187 {
188
189            fSub = myInsDoc('<b>1CY0<\/b>',f,'S','/pdb/explore.do?structureId=1CY0');
190
191    fSub = myInsFolder('Download Files',f,'');
192    fSubSub = myInsDoc('PDB File',fSub,'S','/pdb/download/downloadFile.do?fileFormat=pdb&amp;compression=NO&amp;structureId=1CY0');
193    fSubSub = myInsDoc('PDB gz',fSub,'S','/pdb/files/1cy0.pdb.gz');
194    fSubSub = myInsDoc('PDB File (Header)',fSub,'S','/pdb/download/downloadFile.do?fileFormat=pdb&amp;headerOnly=YES&amp;structureId=1CY0');
195    fSubSub = myInsDoc('mmCIF File',fSub,'S','/pdb/download/downloadFile.do?fileFormat=cif&amp;compression=NO&amp;structureId=1CY0');
196    fSubSub = myInsDoc('mmCIF gz',fSub,'S','/pdb/files/1cy0.cif.gz');
197    fSubSub = myInsDoc('mmCIF File (Header)',fSub,'S','/pdb/download/downloadFile.do?fileFormat=cif&amp;headerOnly=YES&amp;structureId=1CY0');
198    fSubSub = myInsDoc('PDBML/XML File',fSub,'S','/pdb/download/downloadFile.do?fileFormat=xml&amp;compression=NO&amp;structureId=1CY0');
199    fSubSub = myInsDoc('PDBML/XML gz',fSub,'S','/pdb/files/1cy0.xml.gz');
200    fSubSub = myInsDoc('PDBML/XML File (Header)',fSub,'S','/pdb/download/downloadFile.do?fileFormat=xml&amp;headerOnly=YES&amp;structureId=1CY0');
201
202        
203                fSubSub = myInsDoc('Biological Unit Coordinates',fSub,'S','/pdb/files/1cy0.pdb1.gz');
204        
205            fSub = myInsDoc('FASTA Sequence',f,'S','/pdb/download/downloadFile.do?fileFormat=FASTA&amp;compression=NO&amp;structureId=1CY0');
206
207    fSub = myInsFolder('Display Files',f,'');
208
209            fSubSub = myInsDoc('Custom Structure Summary',fSub,'S','/pdb/explore/customStructureReportForm.do?exptype=misc');
210
211    fSubSub = myInsDoc('PDB File',fSub,'B','/pdb/files/1cy0.pdb');
212    fSubSub = myInsDoc('PDB File (Header)',fSub,'B','/pdb/files/1cy0.pdb?headerOnly=YES');
213    fSubSub = myInsDoc('mmCIF File',fSub,'B','/pdb/files/1cy0.cif');
214    fSubSub = myInsDoc('mmCIF File (Header)',fSub,'B','/pdb/files/1cy0.cif?headerOnly=YES');
215    fSubSub = myInsDoc('PDBML/XML File',fSub,'B','/pdb/files/1cy0.xml');
216    fSubSub = myInsDoc('PDBML/XML (Header)',fSub,'B','/pdb/files/1cy0.xml?headerOnly=YES');
217
218            fSub = myInsFolder('Display Molecule',f,'');
219            fSubSub = myInsDoc('Image Gallery',fSub,'S','/pdb/explore/images.do?structureId=1CY0');
220            fSubSub = myInsDoc('KiNG Viewer',fSub,'S','/pdb/static.do?p=explorer/viewers/king.jsp');
221            fSubSub = myInsDoc('Jmol Viewer',fSub,'S','/pdb/static.do?p=explorer/viewers/jmol.jsp');
222            fSubSub = myInsDoc('WebMol Viewer',fSub,'S','/pdb/static.do?p=explorer/viewers/webmol.jsp');
223            fSubSub = myInsDoc('Protein Workshop',fSub,'S','/pdb/Viewers/ProteinWorkshop/protein_workshop_launch.jsp');
224            fSubSub = myInsDoc('Rasmol Viewer <br>(<b>Plugin required</b>)',fSub,'B','/pdb/download/downloadFile.do?fileFormat=PDB&amp;display=rasmol&amp;compression=NO&amp;structureId=1CY0');
225            fSubSub = myInsDoc('Swiss-PDB Viewer <br>(<b>Plugin required</b>)',fSub,'B','/pdb/download/downloadFile.do?fileFormat=PDB&amp;display=spdbv&amp;compression=NO&amp;structureId=1CY0');
226            fSubSub = myInsDoc('KiNG Help',fSub,'S','javascript:callHelp(roboLoc,\'viewers/king.htm\');',ICONPATH + 'question.gif');
227            fSubSub = myInsDoc('Jmol Help',fSub,'S','javascript:callHelp(roboLoc,\'viewers/jmol.htm\');',ICONPATH + 'question.gif');
228            fSubSub = myInsDoc('WebMol Help',fSub,'S','javascript:callHelp(roboLoc,\'viewers/webmol.htm\');',ICONPATH + 'question.gif');
229            fSubSub = myInsDoc('Protein Workshop Help',fSub,'S','javascript:callHelp(roboLoc,\'viewers/proteinworkshop.htm\');',ICONPATH + 'question.gif');
230            fSubSub = myInsDoc('QuickPDB',fSub,'S','javascript:callHelp(roboLoc,\'viewers/thequickpdb.htm\');',ICONPATH + 'question.gif');
231            
232            
233              fSubSub = myInsDoc('Asymmetric Unit /<br> Biological Molecule',fSub,'S','/pdb/static.do?p=explorer/singleimage.jsp&amp;structureId=1cy0&amp;type=asym&amp;size=500');
234            
235            fSub = myInsDoc('<b>Structural Reports</b> ',f,'S','/pdb/explore/biologyAndChemistry.do');
236            fSub = myInsDoc('<b>External Links</b> ',f,'S','/pdb/explore/externalReferences.do');
237            
238            fSub = myInsFolder('Structure Analysis ',f,'');
239            fSubSub = myInsFolder('Geometry ',fSub,'');     
240            
241            
242              fSubSubSub = myInsDoc('RCSB Graphics',fSubSub,'S','/pdb/explore/geometryGraph.do?structureId=1CY0');
243              fSubSubSub = myInsDoc('RCSB Tables',fSubSub,'S','/pdb/explore/geometryDisplay.do');
244            
245              fSubSubSub = myInsDoc('MolProbity Ramachandran Plot',fSubSub,'B','/pdb/images/1CY0_ram_m_500.pdf');
246
247                  
248                                         fSubSub = myInsFolder('Sequence Variants  ',fSub,'');
249                  
250                                                 fSubSubSub = myInsDoc('TOP1_ECOLI Variants',fSubSub,'S','/pdb/search/smartSubquery.do?structureId=1CY0&variant=1&smartSearchSubtype=StructureVariantQuery&spId=TOP1_ECOLI');
251                  
252                                                 fSubSubSub = myInsDoc('TOP1_ECOLI Non-Variants',fSubSub,'S','/pdb/search/smartSubquery.do?structureId=1CY0&variant=0&smartSearchSubtype=StructureVariantQuery&spId=TOP1_ECOLI');
253                    
254            fSub = myInsFolder('Help ',f,'');
255            fSubSub = myInsDoc('Structure Explorer Intro',fSub,'S','javascript:callHelp(roboLoc,\'structure_explorer/introduction_to_structure_explorer.htm\');',ICONPATH + 'question.gif');
256            fSubSub = myInsDoc('Molecular Viewers',fSub,'S','javascript:callHelp(roboLoc,\'molecular_viewers/introduction_to_molecular_viewers.htm\');',ICONPATH + 'question.gif');
257            fSubSub = myInsDoc('Structure Summary',fSub,'S','javascript:callHelp(roboLoc,\'structure_explorer/summary_information.htm\');',ICONPATH + 'question.gif');
258            fSubSub = myInsDoc('Biological Molecule',fSub,'S','javascript:callHelp(roboLoc,\'data_download/biological_unit/biological_unit_introduction.htm\');',ICONPATH + 'question.gif');
259            fSubSub = myInsDoc('Biology &amp; Chemistry',fSub,'S','javascript:callHelp(roboLoc,\'quick_links/quick_links_biology_and_chemistry.htm\');',ICONPATH + 'question.gif');
260            fSubSub = myInsDoc('Sequence Details',fSub,'S','javascript:callHelp(roboLoc,\'quick_links/quick_links_sequence_details.htm\');',ICONPATH + 'question.gif');
261            fSubSub = myInsDoc('Structural Features',fSub,'S','javascript:callHelp(roboLoc,\'quick_links/quick_links.structural_features.htm\');',ICONPATH + 'question.gif');
262            fSubSub = myInsDoc('Materials &amp; Methods (X-Ray)',fSub,'S','javascript:callHelp(roboLoc,\'quick_links/materials_and_methods/x-ray_materials_and_methods.htm\');',ICONPATH + 'question.gif');
263            fSubSub = myInsDoc('Materials &amp; Methods (NMR)',fSub,'S','javascript:callHelp(roboLoc,\'quick_links/materials_and_methods/nmr_materials_and_methods.htm\');',ICONPATH + 'question.gif');
264        
265
266 }
267
268 function addReportTree()
269 {
270
271   fSub = myInsDoc('<b>Show Query Details<\/b>',f,'S','/pdb/static.do?p=results/queryDesc.jsp');
272   fSub = myInsDoc('Results Help',f,'S','javascript:callHelp(\'/robohelp/\',\'query_results_browser/introduction_to_results_browser.htm\');',ICONPATH + 'question.gif'); 
273  
274 }
275
276 function addQueriesTree()
277 {
278    fSub = myInsDoc('Clear queries',f,'S','/pdb/queries/clearqueries.do');
279 }
280
281 </script>
282
283                 <script type="text/javascript" language="JavaScript">
284                   var roboLoc = "/robohelp/";
285                 </script>
286         </head>
287
288   <body bgcolor="#ffffff" style="margin:10px 10px 10px 10px;" onload="liveSearchInit();pdbBodyLoadInit();">
289
290
291 <!-- Universal retriever iFrame - for debugging it's a good idea to make the height 60 or so and frameborder set to one -->
292 <iframe src="/pdb/browse/menu_empty.html" name="retriever" id="retriever" width="100%" height="1" frameborder="0">
293   <!-- This Page requires a modern browser supporting IFRAMES or ILAYERS --><p>&nbsp;</p>
294 </iframe>
295
296 <!-- MAIN TABLE -->
297 <table border="0" cellpadding="0" cellspacing="0" align="center" ><!-- for debugging use border -->
298 <tbody>
299
300  <tr>
301   <td colspan="3">   
302
303  
304
305
306
307
308 <script language="Javascript" type="text/javascript">
309 function checkSequenceLength()
310 {
311 if ((document.headerQueryForm.inputQuickSearch.value.substr(0,9) == 'sequence:')&&(document.headerQueryForm.inputQuickSearch.value.length < 15))
312 {
313         alert('Sequence searches must be at least 6 characters long');
314         return false;
315 }
316 else
317         return liveSearchSubmit();
318        
319 }
320
321 </script>
322 <form action="/pdb/search/navbarsearch.do" onSubmit="return checkSequenceLength();" method="get" name="headerQueryForm">   
323    <table border="0" cellpadding="0" cellspacing="0" width="100%">   
324     <tr>
325      <td width="210"><a href="/pdb"><img alt="RCSB PDB Protein Data Bank | Home" title="RCSB PDB Protein Data Bank | Home" src="/pdb/skins/pastel/images/header/pdblogo.gif" width="198" border="0"></a></td>
326      <td align="right" class="headertop" colspan="2">
327       <a href="http://www.wwpdb.org/" target="_blank"><img alt="A Member of the wwPDB" title="A Member of the wwPDB" src="/pdb/skins/pastel/images/header/wwpdb.gif" width="191" height="15" border="0"></a><br/>
328       An Information Portal to Biological Macromolecular Structures
329       <br/>
330       <span class="body">
331         As of <a href="/pdb/search/smartSubquery.do?smartSearchSubtype=LastLoadQuery">Tuesday Dec 26, 2006</a>&nbsp;<a href="/pdb/rss/LastLoad" title="RSS Feed for the Latest Released Structures"><img alt="RSS Feed for the Latest Released Structures" border="0" src="/pdbstatic/images/feed-icon16x16.png"></a>
332         there are 40870 Structures
333         <a href="javascript:callHelp(roboLoc, 'search_database/latest_release/introduction_to_latest_release.htm');"><img alt="&nbsp;&nbsp;? " title="&nbsp;&nbsp;? " src="/pdb/skins/pastel/images/tree/question.gif" border="0" style="vertical-align:middle;"></a>
334         &nbsp;|&nbsp;
335         <a href="/pdb/static.do?p=general_information/pdb_statistics/index.html&amp">PDB Statistics</a><a href="javascript:callHelp(roboLoc, 'web_site/pdb_statistics.htm');"><img alt="&nbsp;&nbsp;? " title="&nbsp;&nbsp;? " src="/pdb/skins/pastel/images/tree/question.gif" border="0" style="vertical-align:middle;"></a>
336       </span>
337      </td>
338     </tr>
339
340     <tr>
341      <td style="vertical-align:top;padding-top:0.2em;">
342       <a href="/pdb/static.do?p=general_information/about_pdb/contact/index.html" class="tabblutxt">Contact Us</a> |
343       <a href="javascript:callHelp(roboLoc,'site_navigation/introduction_to_site_navigation.htm');" class="tabblutxt">Help</a> |
344       <a href="javascript:printPage('http://www.rcsb.org/pdb/explore/explore.do?structureId=1CY0');" class="tabblutxt">Print Page</a>
345      </td>
346      <td style="background-image:url('/pdb/skins/pastel/images/header/header_bg2.jpg');background-repeat: repeat-x;vertical-align:top;background-position: 0% 100%;" width="37" rowspan="2"><img src="/pdbstatic/images/spacer.gif" alt="" width="37" height="35" border="0"></td>
347
348
349      <td nowrap style="background-image:url('/pdb/skins/pastel/images/header/header_bg3.jpg');background-repeat: repeat-x;background-color:#3333FF;background-position: 0% 100%;padding-right:2px;" class="topnavlinks" rowspan="2">
350
351         <table>
352                 <tr>
353                         <td nowrap class="topnavlinks">
354                                 <input type="hidden" name="newSearch" value="yes">
355                                 <input type="hidden" name="isAuthorSearch" value="no"  >
356                                 <input type="radio" name="radioset" value="All"  checked  class="radio" onclick="javascript:document.headerQueryForm.isAuthorSearch.value='no';">PDB ID or keyword
357                                 <input type="radio" name="radioset" value="Authors"  class="radio" onclick="javascript:if(this.checked){document.headerQueryForm.isAuthorSearch.value='yes';}">Author
358                         </td>
359                         <td class="topnavlinks" nowrap>
360                                 <input type="text"  class="input" size="10" name="inputQuickSearch" id="livesearch" onkeypress="liveSearchStart()" onblur="setTimeout('closeResults()',10000);" >
361                                 <div id="LSResult" style="display: none;"><div id="LSShadow"></div></div>
362                         </td>
363                         <td class="topnavlinks" nowrap>
364                         <input type="image" src="/pdb/skins/pastel/images/header/header_search.gif" size="61" value="Search" class="text" align="middle" name="image" alt="Search" title="Search">
365                     <a href="javascript:callHelp(roboLoc, 'search_database/top_bar_search/top_bar_search.htm');"><img alt="&nbsp;&nbsp;? " title="&nbsp;&nbsp;? " src="/pdb/skins/pastel/images/header/mainquestion.gif" border="0" style="vertical-align:middle;"></a>
366                                         | <a href="/pdb/search/advSearch.do" class="textwhite">Advanced Search</a>   
367                         </td>
368                 </tr>
369         </table>         
370
371      </td>
372     </tr>
373  
374     <tr>
375      <td style="background-image:url('/pdb/skins/pastel/images/header/header_bg1.jpg');text-align:left;background-repeat: repeat-x;vertical-align:top;background-position: 0% 100%;"><img src="/pdbstatic/images/spacer.gif" alt="" width="37" height="13" border="0"></td>
376     </tr>     
377    </table>
378 </form> 
379
380
381
382
383
384
385
386
387   </td>
388  </tr>
389
390  <tr valign="top">
391   <td  class="navbackground" style="width:230px;">
392
393  
394
395
396
397
398
399                
400                
401    <table border="0" cellpadding="0" cellspacing="0">
402     <tr>
403      <td class="navbackground">
404                 <div id="hdrtab">
405                   <ul>
406                     <li><a href="/pdb/Welcome.do">Home</a></li>
407                     <li><a href="/pdb/static.do?p=search/index.html">Search</a></li>
408                    
409                     <li id="current"><a>Structure</a></li>
410                    
411                   </ul>
412                 </div>
413           </td>
414     </tr>
415     <tr>
416      <td class="navbackground">
417                 <a style="font-size:1pt;text-decoration:none;color:#FFFF99" href="http://www.treemenu.net/" target=_blank></a>
418                 <span class="TreeviewSpanArea">
419                 <script type="text/javascript" language="JavaScript">
420                         foldersTree = gFld('', '');
421                         foldersTree.treeID = 'ExploreRoot';
422                         foldersTree.xID = foldersTree.treeID;
423                         f = foldersTree;
424                         addExploreTree();
425                         initializeDocument();
426                         checkLocation();
427                 </script>
428
429                 </span>
430       </td>
431     </tr>
432     <tr>
433      <td class="homewhite" height="1"><img src="/pdbstatic/images/spacer.gif" alt="" width="1" height="1" border="0"></td>
434     </tr>
435    </table>
436
437
438
439
440
441   </td>
442
443   <td valign="top" style="vertical-align:top;">  <!-- BEGIN MAIN RIGHT -->
444   <table border="0" cellpadding="0" cellspacing="0">
445    <tr>
446      <td valign="top" class="maintd">
447         <noscript><div style="padding-left:8px;font-weight:bold;color:#FF0000;font-size:16px">This browser is either not Javascript enabled or has it turned off.<br />This site will not function correctly without Javascript.</div></noscript>
448          
449        
450
451
452
453
454
455
456
457 <div id="hdrtab">
458                   <ul>
459
460                            <li><a href="/pdb/explore.do?structureId=1CY0">Structure Summary</a></li>
461                            
462                            <li><a href="/pdb/explore/biologyAndChemistry.do?structureId=1CY0">Biology & Chemistry</a></li>
463                            
464                            <li><a href="/pdb/explore/materialsAndMethods.do?structureId=1CY0">Materials & Methods</a></li>
465                            
466                            <li><a href="/pdb/explore/sequence.do?structureId=1CY0">Sequence Details</a></li>
467                            
468                            <li><a href="/pdb/explore/geometryDisplay.do?structureId=1CY0">Geometry</a></li>
469                            
470                   </ul>
471                 </div>
472      </td>
473    </tr>
474
475    <tr>
476     <td>
477      <font color="RED"><b>LIMITED FUNCTIONALITY BROWSER</b></font>
478      <br />
479      <span class="static_header">
480        You are currently using a browser that is not fully compatible with this website.<br />
481        If you would like to have full functionality, please upgrade your browser. <br />
482        
483                                                                        
484         <a href="/pdb/static.do?p=home/faq.html#goodBrowsers">List of Supported & Unsupported Browsers for the RCSB Protein Data Bank Web Site</a> <br />
485
486        <br />
487      </span>
488     </td>
489    </tr>
490
491    <tr>
492     <td valign="top" class="maintd" style="padding-left:10px;">
493
494
495  
496
497
498
499  
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535 <!---->
536
537
538
539
540
541 <script type="text/javascript" language="JavaScript">
542         
543                 function searchCitationAuthor(author, primary)
544                 {
545                                 document.queryCitationAuthorForm.elements['citation_author.name.value'].value = author;
546                                 document.queryCitationAuthorForm.elements['citation_author.name.comparator'].value = "equals";
547                                 if (primary)
548                                         document.queryCitationAuthorForm.elements['citation_author.citation_id.value'].value="primary";
549                                 document.queryCitationAuthorForm.submit();
550                 }
551
552                 function searchStructureAuthor(author)
553                 {
554                                 document.queryStructureAuthorForm.elements['authName'].value = author;
555                                 document.queryStructureAuthorForm.submit();
556                 }
557                
558                 function searchPdbxKeyword(keyword)
559                 {
560                   document.queryKeywordsForm.elements['struct_keywords.pdbx_keywords.value'].value = keyword;
561                   document.queryKeywordsForm.submit();
562                 }
563                
564                 function searchLigand(id)
565                 {
566                                 document.ligandQueryForm.elements['chem_comp.id.value'].value = id;
567                                 document.ligandQueryForm.elements['chem_comp.id.comparator'].value = "=";
568                                 document.ligandQueryForm.submit();
569                 }
570                 function searchEc(ec)
571                 {
572                                 document.ecQueryForm.elements['entity.pdbx_ec.value'].value = ec;
573                                 document.ecQueryForm.elements['entity.pdbx_ec.comparator'].value = "=";
574                                 document.ecQueryForm.submit();
575                 }
576                 function searchSpaceGroup(sg)
577                 {
578                         document.sgQueryForm.elements['symmetry.space_group_name_H-M.value'].value = sg;
579                         document.sgQueryForm.elements['symmetry.space_group_name_H-M.comparator'].value = "=";
580                         document.sgQueryForm.submit();
581                 }
582                
583                 function searchSource(natSci, natCommon, genSci, genCommon)
584                 {
585                         document.srcQueryForm.elements['entity_src_nat.pdbx_organism_scientific.value'].value = natSci;
586                         document.srcQueryForm.elements['entity_src_nat.common_name.value'].value = natCommon;
587                         document.srcQueryForm.elements['entity_src_gen.pdbx_gene_src_scientific_name.value'].value = genSci;
588                         document.srcQueryForm.elements['entity_src_gen.gene_src_common_name.value'].value = genCommon;
589                         document.srcQueryForm.submit();
590                 }
591                
592         </script>
593                                        
594
595
596
597 <script type="text/javascript" language="JavaScript">
598
599         function queryByExample(s, t)
600         {
601         }
602         function FW_startTimeout()
603         {
604         }
605         function removeHighlight()
606         {
607         }
608
609 </script>
610
611         <table cellpadding="2" cellspacing="2" border="0" style="text-align: left; width: 100%;">
612           <tbody>
613                 <tr>
614                   <td style="vertical-align: top;">
615                          <table cellpadding="5" cellspacing="2" border="0" style="text-align: left; width: 100%;">
616                            <tbody>
617                                  <tr>
618                                    <td>
619                 <!--div id="hdrtab">
620                   <ul>
621                     <li id="current"><a href="#">Structure Summary</a></li>
622                     <li><a href="/pdb/explore/biologyAndChemistry.do">Biology & Chemistry</a></li>
623                     <li><a href="/pdb/explore/materialsAndMethods.do">Materials & Methods</a></li>
624                     <li><a href="/pdb/explore/sequence.do">Sequence Details</a></li>
625                     <li><a href="/pdb/explore/geometryDisplay.do">Geometry</a></li>
626                   </ul>
627                 </div-->
628     </td>
629                                    <td style="text-align: right;">
630        
631                                        
632
633
634                                                 <span class="se_pagetitle">1CY0&nbsp;</span>
635                                                 <a href="/pdb/files/1cy0.pdb" title="Display PDB file" target="_blank"><img src="/pdbstatic/images/summary.jpg" alt="Display PDB file" border='0'></a>
636
637                                    </td>
638                                  </tr>
639                            </tbody>
640                          </table>
641                          <table cellpadding="5" cellspacing="5" border="0"  style="text-align: left; width: 100%;">
642                           <tbody>
643                                 <tr class="se_back1">
644                                         <td width="15%" align="right" valign="top" class="se_item">
645                                                 Title
646                                         </td>
647                                         <td width="85%" valign="top" class="se_datalarge1">
648                                                        
649                                                         COMPLEX OF E.COLI DNA TOPOISOMERASE I WITH 3'-5'-ADENOSINE DIPHOSPHATE
650                                                        
651                                                         <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="StructureTitle">
652                                         </td>
653                                 </tr>
654                                 <tr class="se_back2">
655                                         <td width="15%" align="right" class="se_item">
656                                                 Authors
657                                         </td>
658                                         <td width="85%" >
659                                          <form action="/pdb/search/smartSubquery.do" method="get" name="queryStructureAuthorForm">
660                                                  <input type="hidden" name="authName" value="">
661                                                  <input type="hidden" name="smartSearchSubtype" value="AuthorQuery">
662                                                  <input type="hidden" name="display" value="true">
663                                                   <a class="seAuthors" onmouseover="this.className='seAuthorsHover';" onmouseout="this.className='seAuthors';" onclick="searchStructureAuthor('Feinberg, H.');">Feinberg, H.</a>,&nbsp;&nbsp;<a class="seAuthors" onmouseover="this.className='seAuthorsHover';" onmouseout="this.className='seAuthors';" onclick="searchStructureAuthor('Changela, A.');">Changela, A.</a>,&nbsp;&nbsp;<a class="seAuthors" onmouseover="this.className='seAuthorsHover';" onmouseout="this.className='seAuthors';" onclick="searchStructureAuthor('Mondragon, A.');">Mondragon, A.</a>
664                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="StructureExperimentAuthors">
665                                         </form>
666                                         </td>
667                                 </tr>
668                                 <tr class="se_back1">
669                                         <td width="15%" align="right" class="se_item">
670                                          <form action="/pdb/search/smartSubquery.do" method="get" name="queryCitationAuthorForm">
671                                                  <input type="hidden" name="citation_author.name.value" value="">
672                                                  <input type="hidden" name="citation_author.name.comparator" value="contains">
673                                                  <input type="hidden" name="citation_author.citation_id.value" value="">
674                                                  <input type="hidden" name="smartSearchSubtype" value="CitationAuthorQuery">
675                                                  <input type="hidden" name="display" value="true">
676                                         </form>
677                                                 Primary Citation&nbsp;&nbsp;
678                                         </td>
679                                         <td width="85%" class="se_datasmall1">
680                                                 <a class="sePrimarycitations" onmouseover="this.className='sePrimarycitationsHover';" onmouseout="this.className='sePrimarycitations';" onclick="searchCitationAuthor('Feinberg, H.', true);">Feinberg, H.</a>,&nbsp;&nbsp;<a class="sePrimarycitations" onmouseover="this.className='sePrimarycitationsHover';" onmouseout="this.className='sePrimarycitations';" onclick="searchCitationAuthor('Changela, A.', true);">Changela, A.</a>,&nbsp;&nbsp;<a class="sePrimarycitations" onmouseover="this.className='sePrimarycitationsHover';" onmouseout="this.className='sePrimarycitations';" onclick="searchCitationAuthor('Mondragon, A.', true);">Mondragon, A.</a>
681                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="CitationArticleAuthors">
682                                                
683                                                 Protein-nucleotide interactions in E. coli DNA topoisomerase I.
684                                                
685                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="CitationArticleTitle">
686                                                
687                                                 <span class="se_journal">Nat.Struct.Biol.</span>
688                                                
689
690                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="CitationJournalName">
691                                                
692                                                 <span class="se_journal">v6</span>
693                                                
694                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="CitationJournalVolume">
695                                                
696                                                 <span class="se_journal">pp.961-968</span>
697                                                
698                                                 ,
699                                                         <span class="se_journal">1999</span>
700                                                
701
702                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="CitationJournalYear">
703                                                        
704                                                
705                                                 <br>
706                                                 [&nbsp;<a class="se_datalarge" href="/pdb/explore/pubmed.do?structureId=1CY0" name="Abstract" ALT="View Searchable PubMed Abstract" TITLE="View Searchable PubMed Abstract">Abstract</a>&nbsp;]&nbsp;&nbsp;&nbsp;
707                                 <a class="se_datalarge" href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&amp;db=PubMed&amp;dopt=Abstract&amp;list_uids=10504732" target="resource_window">
708                                         <img src="/pdbstatic/explorer/images/abstract_icon.gif" BORDER="0" ALT="View PubMed Abstract at NCBI" TITLE="View PubMed Abstract at NCBI">
709                                                
710                                         </td>
711                                 </tr>
712                                 <tr class="se_back2">
713                                         <td width="15%" align="right" class="se_item">
714                                                 History
715                                         </td>
716                                         <td width="85%" class="se_datalarge2">
717                                                 <span class="se_subitem">Deposition&nbsp;&nbsp;</span>
718                                                 1999-08-31
719                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="DepositionDate">
720                                                        
721                                                 <span class="se_subitem">&nbsp;Release&nbsp;</span>
722                                                 2000-03-08
723                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="ReleaseDate">
724
725                                          </td>
726                                 </tr>
727                                 <tr class="se_back1">
728                                         <td width="15%" align="right" class="se_item">
729                                                 Experimental Method
730                                         </td>
731                                         <td width="85%" class="se_datalarge1">
732                                                 <span class="se_subitem">Type&nbsp;&nbsp;</span>
733                                                 X-RAY DIFFRACTION
734                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="">
735                                                 <span class="se_subitem">Data</span>
736
737
738                 <a class="se_datasmall1">N/A</a>
739        
740
741
742                                                 </td></tr>
743
744
745                 <tr class="se_back2">
746                         <td width="15%" align="right" class="se_item">
747                                 Parameters&nbsp;&nbsp;
748                         </td>
749                         <td width="85%" class="se_datalarge2">
750                        
751                          <form action="/pdb/search/smartSubquery.do" method="get" name="sgQueryForm">
752                                  <input type="hidden" name="smartSearchSubtype" value="SpaceGroupQuery">
753                                  <input type="hidden" name="display" value="true">
754                                 <input type="hidden" name="symmetry.space_group_name_H-M.value" value="">
755                                 <input type="hidden" name="symmetry.space_group_name_H-M.comparator" value="=">
756                        
757                                 <table border="0" cellpadding="1" cellspacing="1" width="100%">
758                                         <tr class="se_subitem2">
759                                                 <td width="20%">
760                                                         Resolution[&Aring;]
761                                                     <a href='/pdb/statistics/histogram.do?mdcat=refine&amp;mditem=ls_d_res_high&amp;minLabel=0&amp;maxLabel=5&amp;numOfbars=10'>
762                                                       <img src='/pdbstatic/images/histogram16x16.jpg' border=0 title='View a histogram of Resolution' alt='View a histogram of Resolution'>
763                                                     </a>
764                                                 </td>
765                                                 <td width="20%">
766                                                         R-Value
767                                                 </td>
768                                                 <td width="20%">
769                                                         R-Free
770                                                 <td width="25%">
771                                                         Space Group
772                                                 </td>
773                                         </tr>
774                                         <tr class="se_subitem2">
775                                             <td width="20%" class="se_datalarge2">
776                                                            2.45
777                                                         <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="Resolution">
778                                                 </td>
779                                                 <td width="20%" align="left" class="se_datalarge2">
780                                                         0.220
781                                                         (obs.)
782                                                 </td>
783                                                 <td width="20%" align="left" class="se_datalarge2">
784                                                         0.274
785                                                        
786                                                 </td>
787                                                 <td width="25%" class="se_datalarge2">
788                                                                
789                                                   <a class="se_datasmall" onclick="searchSpaceGroup('P 21 21 21');" onmouseover="this.className='se_datasmallHover'" onmouseout="this.className='se_datasmall'">
790                                                   P 2<sub>1</sub> 2<sub>1</sub> 2<sub>1</sub>
791                                                   </a>
792                                                            
793                                                         <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="SpaceGroup">
794                                                 </td>
795                                    </tr>
796                         </table>
797                         </form>
798                         </td>
799                 </tr>
800                 <tr class="se_back1">
801                         <td width="15%" align="right" class="se_item">
802                                 Unit Cell&nbsp;&nbsp;
803                         </td>
804                         <td width="85%" class="se_datalarge1" valign="top">
805                                 <table border="0" cellpadding="1" cellspacing="1"  width="100%">
806                                         <tr class="se_subitem1">
807                                                 <td width="20%">
808                                                         <span class="se_unitcellitem">Length [&Aring;]</span>
809                                                 </td>
810                                                 <td width="13%">
811                                                         a
812                                                 </td>
813                                                 <td width="13%" >
814                                                        
815                                                         <span class="se_datasmall1">63.04</span>
816                                                        
817                                                 </td>
818                                                 <td width="13%" >
819                                                         b
820                                                 </td>
821                                                 <td width="13%">
822                                                        
823                                                         <span class="se_datasmall1">73.31</span>
824                                                        
825                                                 </td>
826                                                 <td width="13%" >
827                                                         c
828                                                 </td>
829                                                 <td width="15%" >
830                                                        
831                                                         <span class="se_datasmall1">134.46</span>
832                                                        
833                                                 </td>
834                                         </tr>
835                                         <tr class="se_subitem1">
836                                                 <td width="20%">
837                                                         <span class="se_unitcellitem">Angles [&#176;]</span>
838                                                 </td>
839                                                 <td width="13%" >
840                                                         alpha
841                                                 </td>
842                                                 <td width="13%" >
843                                                        
844                                                         <span class="se_datasmall1">90.00</span>&nbsp;
845                                                        
846                                                 </td>
847                                                 <td width="13%">
848                                                         beta
849                                                 </td>
850                                                 <td width="13%" >
851                                                        
852                                                         <span class="se_datasmall1">90.00</span>&nbsp;
853                                                        
854                                                 </td>
855                                                 <td width="13%" >
856                                                         gamma
857                                                 </td>
858                                                 <td width="15%" >
859                                                        
860                                                         <span class="se_datasmall1">90.00</span>&nbsp;
861                                                        
862                                                 </td>
863                                         </tr>
864                                 </table>
865                         </td>
866                 </tr>
867
868                
869             <tr class="se_back2">
870                         <td width="15%" align="right" valign="top" class="se_item">
871                                 Molecular Description  Asymmetric Unit
872                         </td>
873                         <td width="85%" class="se_datalarge2" valign="top">
874                        
875                          <form action="/pdb/search/smartSubquery.do" method="get" name="ecQueryForm">
876                                  <input type="hidden" name="smartSearchSubtype" value="EnzymeClassificationQuery">
877                                  <input type="hidden" name="display" value="true">
878                                 <input type="hidden" name="entity.pdbx_ec.value" value="">
879                                 <input type="hidden" name="entity.pdbx_ec.comparator" value="=">
880
881                                 <!-- %=org.pdb.util.middle.MoleculeDescription.getDescription(myStructureId)%><br-->
882
883        
884                                 <span class="se_subitem"><br>Polymer:</span>&nbsp;1&nbsp;&nbsp;
885 <span class="se_subitem">Molecule:</span>&nbsp;DNA TOPOISOMERASE I&nbsp;&nbsp;
886 <span class="se_subitem">Fragment:</span>&nbsp;67 KDA N-TERMINAL FRAGMENT OF E.COLI TOPOISOMERASE I&nbsp;&nbsp;
887 <span class="se_subitem">Chains:</span>&nbsp;A&nbsp;&nbsp;
888 <span class="se_subitem">EC no.:</span>&nbsp;<a class="se_datasmall" onclick="searchEc('5.99.1.2');" onmouseover="this.className='se_datasmallHover'" onmouseout="this.className='se_datasmall'">5.99.1.2</a>&nbsp;<a class="se_datalarge" href="http://www.chem.qmul.ac.uk/iubmb/enzyme/EC5/99/1/2.html" target="resource_window"><img src="/pdbstatic/explorer/images/iubmb_icon.gif" BORDER="0" ALT="Go to IUBMB EC entry" TITLE="Go to IUBMB EC entry"></a>&nbsp;
889 &nbsp;
890                         </form>                                 
891
892                        
893                                          <form action="/pdb/search/smartSubquery.do" method="get" name="queryKeywordsForm">
894                                                  <input type="hidden" name="smartSearchSubtype" value="StructureKeywordsQuery">
895                                                  <input type="hidden" name="display" value="true">
896                                                  <input type="hidden" name="struct_keywords.pdbx_keywords.value" value="">
897                                                  <input type="hidden" name="struct_keywords.pdbx_keywords.comparator" value="startswith">
898                                         </form>
899
900
901                       <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="Compound">
902                                         </td>
903                                 </tr>
904                                 <tr class="se_back1">
905                                         <td width="15%" align="right" valign="top" class="se_item">
906                                                 Classification
907                                         </td>
908                                         <td width="40%" class="se_datalarge1">
909                                                
910                                                 <span class="qrb_value">
911                                                   <a class="sePrimarycitations" onmouseover="this.className='sePrimarycitationsHover';" onmouseout="this.className='sePrimarycitations';" onclick="searchPdbxKeyword('ISOMERASE');">
912                                                     Isomerase
913                                                   </a>
914                                                 </span>
915                                                
916                                                 <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="StructureSummary">
917
918                                         </td>
919                                 </tr>
920                         </tbody>
921                   </table>
922                   </td>
923                   <td style="vertical-align: top; text-align: right; width: 250px;">
924                                
925
926
927
928
929
930
931
932
933
934
935 <script type="text/javascript" language="JavaScript">
936         
937         var isImage = "bio";
938         function switchStructureImage()
939         {
940                 if (isImage == "bio") {
941                         document.switchImageForm.structureImageButton.value = "Asymmetric Unit";
942                         document.switchImageForm.structureImage.src = '/pdb/images/1cy0_asym_r_250.jpg';
943                         isImage = "asym";
944                 }
945                 else {
946                         document.switchImageForm.structureImageButton.value = "Biological Molecule";
947                         //structureImageText.innerText = "Biological Molecule";
948                         document.switchImageForm.structureImage.src = '/pdb/images/1cy0_bio_r_250.jpg';
949                         isImage = "bio";
950                 }
951         }
952 </script>
953
954
955
956 <form name="switchImageForm" action="">
957
958
959 <table border="0" cellpadding="0" cellspacing="0" width="256">
960         <tr class="homedark">
961                 <td class="putih4">&nbsp;Images and Visualization</td>
962                 <td align="right" valign="top"><img
963                         src="/pdbstatic/home/tablecorner.gif" alt="" border="0"></td>
964         </tr>
965         <tr>
966                 <td colspan="2">
967                 <table border="0" cellpadding="1" cellspacing="0" width="100%"
968                         class="homedark">
969                         <tr>
970                                 <td>
971                                 <table border="0" cellpadding="1" cellspacing="0" width="100%"
972                                         class="homewhite">
973
974
975
976
977                                         <!--table border="0" width="250" align="right" bgcolor="#9999cc"-->
978                                        
979                                         <tr>
980                                                 <td align="center"><font size="2" face="Arial, Arial, Helvetica">Biological
981                                                 Molecule / Asymmetric Unit</font></td>
982                                         </tr>
983                                         <tr>
984                                                 <td align="center"><img id="structureImage" name="structureImage"
985                                                         src="/pdb/images/1cy0_asym_r_250.jpg" border="0"
986                                                         alt="Biological Molecule / Asymmetric Unit Image for 1CY0">
987                                                 </td>
988                                         </tr>
989                                        
990                                         <tr>
991                                                 <td align="center" class="it_item">
992                                                 <table>
993                                                         <tr>
994                                                                 <td class="it_item" colspan="4" align="center">Display Options</td>
995                                                         </tr>
996                                                         <tr>
997                                                                 <td align="center"><a class="imagetable"
998                                                                         href="/pdb/static.do?p=explorer/viewers/king.jsp"
999                                                                         title="KiNG requires Java">KiNG</a></td>
1000                                                         </tr>
1001                                                         <tr>
1002                                                                 <td align="center"><a class="imagetable"
1003                                                                         href="/pdb/static.do?p=explorer/viewers/jmol.jsp"
1004                                                                         title="Jmol requires Java">Jmol</a></td>
1005                                                         </tr>
1006                                                         <tr>
1007                                                                 <td align="center"><a class="imagetable"
1008                                                                         href="/pdb/static.do?p=explorer/viewers/webmol.jsp"
1009                                                                         title="WebMol requires Java">WebMol</a></td>
1010                                                         </tr>
1011                                                         <tr>
1012                                                                 <td align="center"><a class="imagetable"
1013                                                                         href="/pdb/Viewers/ProteinWorkshop/protein_workshop_launch.jsp"
1014                                                                         title="ProteinWorkshop requires Java">Protein Workshop</a></td>
1015                                                         </tr>
1016                                                         <tr><td align="center"><a class="imagetable" href="/pdb/static.do?p=Viewers/QuickPDB/quickPDBApplet.jsp" title="QuickPDB requires Java">QuickPDB</a></td></tr>
1017                                                         <tr>
1018                                                                 <td align="center"><a class="imagetable"
1019                                                                         href="/pdb/explore/images.do?structureId=1CY0">All
1020                                                                 Images</a></td>
1021                                                         </tr>
1022
1023
1024                                                         <!--
1025                                 <tr>
1026                                         <td class="it_item">Web Start&nbsp;&nbsp;</td>
1027                                         <td><a class="imagetable" href="/pdb/Viewers/king.jsp" title="King requires Java">KiNG</a></td>
1028                                         <td><a class="imagetable" href="/pdb/Viewers/jmol.jsp" title="Jmol requires Java">Jmol</a></td>
1029                                         <td><a class="imagetable" href="/pdb/Viewers/webmol.jsp" title="WebMol requires Java">WebMol</a></td>
1030                                 </tr>
1031                                 -->
1032                                                 </table>
1033                                                 </td>
1034                                         </tr>
1035
1036
1037                                 </table>
1038                                 </td>
1039
1040                         </tr>
1041                 </table>
1042                 </td>
1043         </tr>
1044 </table>
1045
1046 </form>
1047
1048                   </td>
1049                 </tr>
1050           </tbody>
1051         </table>
1052
1053
1054         <table border="0" cellpadding="4" cellspacing="4" width="100%">
1055                                         <tr class="se_back2">
1056                                                 <td width="15%" align="right" valign="top" class="se_item">
1057                                                         Source
1058                                                  <form action="/pdb/search/smartSubquery.do" method="get" name="srcQueryForm">
1059                                                         <!--  searchSource(natSci, natCommon, genSci, genCommon) -->
1060                                                          <input type="hidden" name="smartSearchSubtype" value="CifFieldsQuery">
1061                                                          <input type="hidden" name="entity_src_nat.pdbx_organism_scientific.value" value="">
1062                                                          <input type="hidden" name="entity_src_nat.common_name.value" value="">
1063                                                          <input type="hidden" name="entity_src_gen.pdbx_gene_src_scientific_name.value" value="">
1064                                                          <input type="hidden" name="entity_src_gen.gene_src_common_name.value" value="">
1065                                                 </form>
1066                                                 </td>
1067                                                 <td width="85%" class="se_datalarge2" valign="top">
1068                
1069                         <span class="se_subitem">Polymer:</span>
1070                                         &nbsp;1 &nbsp;&nbsp;
1071                                         <span class="se_subitem">Scientific Name:</span>
1072                                         <span class="qrb_value">
1073                                           <a class="sePrimarycitations" onmouseover="this.className='sePrimarycitationsHover';" onmouseout="this.className='sePrimarycitations';" onclick="searchSource('', '', 'ESCHERICHIA COLI', '');">
1074                                             &nbsp;Escherichia coli&nbsp;&nbsp;
1075                                           </a>
1076                                         </span>
1077                                         <a class="se_datalarge" href="http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=562" target="resource_window"><img src="/pdbstatic/explorer/images/taxonomy_icon.gif" BORDER="0" ALT="Go to NCBI Taxonomy entry" TITLE="Go to NCBI Taxonomy entry"></a>
1078                
1079                                         <span class="se_subitem">Common Name:</span>
1080                                         <span class="qrb_value">
1081                                           <a class="sePrimarycitations" onmouseover="this.className='sePrimarycitationsHover';" onmouseout="this.className='sePrimarycitations';" onclick="searchSource('', '', '', 'BACTERIA');">
1082                                             &nbsp;Bacteria&nbsp;&nbsp;
1083                                           </a>
1084                                         </span>
1085                
1086                                         <span class="se_subitem">Expression system:</span>
1087                                         <span class="qrb_value">
1088                                           <a class="sePrimarycitations" onmouseover="this.className='sePrimarycitationsHover';" onmouseout="this.className='sePrimarycitations';" onclick="searchSource('', '', 'ESCHERICHIA COLI', '');">
1089                                             &nbsp;Escherichia coli&nbsp;&nbsp;
1090                                           </a>
1091                                         </span>
1092
1093        
1094            <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="Source">
1095                                         </td>
1096                                 </tr>
1097                                        
1098                                 <tr class="se_back1">
1099                                         <td width="15%" align="right" class="se_item">
1100                                                 Related PDB Entries
1101                                         </td>
1102                                         <td width="85%" class="se_datalarge1">
1103                                         <table border="0" cellpadding="1" cellspacing="1" width="100%">
1104                                                 <tr>
1105                                                         <td width="20%">
1106                                                                 <span class="se_subitem">Id</span>
1107                                                         </td>
1108                                                         <td width="80%">
1109                                                                 <span class="se_subitem">Details</span>
1110                                                         </td>
1111                                                 </tr>
1112                  
1113                                 <tr>
1114                                          <td width="15%" class="se_datalarge1">
1115                                                 <a class="se_datalarge" href="/pdb/explore.do?structureId=1CYY" >1CYY</a>&nbsp;
1116                                          </td>
1117                                          <td width="65%" class="se_datalarge1">
1118                                                  Crystal structure of the 30 kDa fragment of E. coli DNA topoisomerase I. Hexagonal form&nbsp;
1119                                          </td>
1120                                  </tr>
1121                
1122                                 <tr>
1123                                          <td width="15%" class="se_datalarge1">
1124                                                 <a class="se_datalarge" href="/pdb/explore.do?structureId=1CY9" >1CY9</a>&nbsp;
1125                                          </td>
1126                                          <td width="65%" class="se_datalarge1">
1127                                                  Crystal structure of the 30 kDa fragment of E. coli DNA topoisomerase I. Monoclinic form&nbsp;
1128                                          </td>
1129                                  </tr>
1130                
1131                                 <tr>
1132                                          <td width="15%" class="se_datalarge1">
1133                                                 <a class="se_datalarge" href="/pdb/explore.do?structureId=1CY8" >1CY8</a>&nbsp;
1134                                          </td>
1135                                          <td width="65%" class="se_datalarge1">
1136                                                  Complex of E.coli DNA topoisomerase I with 5'-thymidine monophosphate and 3'-thymidine monophosphate&nbsp;
1137                                          </td>
1138                                  </tr>
1139                
1140                                 <tr>
1141                                          <td width="15%" class="se_datalarge1">
1142                                                 <a class="se_datalarge" href="/pdb/explore.do?structureId=1CY7" >1CY7</a>&nbsp;
1143                                          </td>
1144                                          <td width="65%" class="se_datalarge1">
1145                                                  Complex of E.coli DNA topoisomerase I with 5'-thymidine monophosphate&nbsp;
1146                                          </td>
1147                                  </tr>
1148                
1149                                 <tr>
1150                                          <td width="15%" class="se_datalarge1">
1151                                                 <a class="se_datalarge" href="/pdb/explore.do?structureId=1CY6" >1CY6</a>&nbsp;
1152                                          </td>
1153                                          <td width="65%" class="se_datalarge1">
1154                                                  Complex of E.coli DNA topoisomerase I with 3'-thymidine monophosphate&nbsp;
1155                                          </td>
1156                                  </tr>
1157                
1158                                 <tr>
1159                                          <td width="15%" class="se_datalarge1">
1160                                                 <a class="se_datalarge" href="/pdb/explore.do?structureId=1CY4" >1CY4</a>&nbsp;
1161                                          </td>
1162                                          <td width="65%" class="se_datalarge1">
1163                                                  Complex of E.coli DNA topoisomerase I with 5'pTpTpTp3'&nbsp;
1164                                          </td>
1165                                  </tr>
1166                
1167                                 <tr>
1168                                          <td width="15%" class="se_datalarge1">
1169                                                 <a class="se_datalarge" href="/pdb/explore.do?structureId=1CY2" >1CY2</a>&nbsp;
1170                                          </td>
1171                                          <td width="65%" class="se_datalarge1">
1172                                                  Complex of E.coli DNA topoisomerase I with TpTpTp3'&nbsp;
1173                                          </td>
1174                                  </tr>
1175                
1176                                 <tr>
1177                                          <td width="15%" class="se_datalarge1">
1178                                                 <a class="se_datalarge" href="/pdb/explore.do?structureId=1CY1" >1CY1</a>&nbsp;
1179                                          </td>
1180                                          <td width="65%" class="se_datalarge1">
1181                                                  COMPLEX OF E.COLI DNA TOPOISOMERASE I WITH 5'pTpTpT&nbsp;
1182                                          </td>
1183                                  </tr>
1184                                
1185                          </table>
1186                         </td>
1187                 </tr>                           
1188                        
1189         <!-- Display one or more non - polymers such as ligands here -->
1190                
1191                 <tr class="se_back2">
1192                         <td width="15%" align="right" valign="top" class="se_item">
1193                                 Chemical Component&nbsp;&nbsp;
1194                         </td>
1195                         <td width="85%" valign="top">
1196                                 <table border="0" cellpadding="1" cellspacing="1"  width="100%" >
1197                                  <tr class="se_subitem2">
1198                                          <td width="10%">
1199                                                  Identifier
1200                                          </td>
1201                                          <td width="35%">
1202                                                  Name
1203                                          </td>
1204                                          <td width="25%">
1205                                                  Formula
1206                                          </td>
1207                                          <td width="20%">
1208                                                  Drug Similarity
1209                                          </td>
1210                                          <td width="20%">
1211                                                  Ligand Structure
1212                                          </td>
1213                                          
1214                                          <td width="20%">
1215                                                  Ligand Interaction
1216                                          </td>
1217                                          
1218                                          <td>
1219                                                 <form action="/pdb/search/smartSubquery.do" method="get" name="ligandQueryForm">
1220                                                  <input type="hidden" name="smartSearchSubtype" value="LigandIdQuery">
1221                                                  <input type="hidden" name="display" value="true">
1222                                                         <input type="hidden" name="chem_comp.id.value" value="">
1223                                                         <input type="hidden" name="chem_comp.id.comparator" value="=">
1224                                                 </form>
1225                                          </td>
1226                                  </tr>
1227                                  
1228                  
1229                                  <tr>
1230                                          <td width="5%" class="se_datalarge2" valign="middle">
1231                                                         <a class="se_datalarge" onclick="searchLigand('A3P');" onmouseover="this.className='se_datalargeHover'" onmouseout="this.className='se_datalarge'">A3P</a>
1232                                             <img src="/pdbstatic/explorer/images/spacer.gif" alt="" name="NonPolymerEntities">&nbsp;
1233                                          </td>
1234                                          <td width="30%" class="se_datalarge2">
1235                                                 ADENOSINE-3'-5'-DIPHOSPHATE&nbsp;
1236                                          </td>
1237                                          <td width="20%" class="se_formula2">
1238                                                 C<sub>1</sub><sub>0</sub> H<sub>1</sub><sub>5</sub> N<sub>5</sub> O<sub>1</sub><sub>0</sub> P<sub>2</sub> &nbsp;
1239                                          </td>
1240                                          <td width="10%" class="se_datalarge2">
1241                                                  <a class="se_datasmall" href="http://bioinformatics.charite.de/superligands/drug_similarity.php?hetero=A3P" target=resource_window>
1242                                                      [ View ]
1243                                                  </a>
1244                                          </td>
1245                                          <td width="15%" class="se_datalarge2">
1246                                                  <a class="se_datasmall" href="/pdb/explore/marvin.do?handler=structureExplorer&amp;hetId=A3P&amp;sid=1CY0">
1247                                                      [ View ]
1248                                                  </a>
1249                                          </td>
1250                                          
1251                                          <td width="20%" class="se_datalarge2">
1252                                                  <a class="se_datasmall" href="/pdb/explore/ligand3d.jnlp?hetId=A3P">
1253                                                      [ View ]
1254                                                  </a>
1255                                          </td>
1256                                          
1257                                  </tr>
1258                        
1259                        
1260                                 </table>
1261                          </td>
1262                     </tr>
1263    
1264      
1265
1266  <tr class="se_back1">
1267                                         <td width="15%" align="right" valign="top" class="se_item">
1268                                                 <span class="external_ref">SCOP Classification <font size="-2">(version&nbsp;1.69)</font></span>
1269                                         </td>
1270                                         <td width="85%" valign="top">
1271                                     <table border="0" cellpadding="1" cellspacing="1"  width="100%" >
1272                                              <tr class="se_subitem1">
1273                                                     <td width="12%">
1274                                                            Domain Info
1275                                                    </td>
1276                                                     <td width="13%">
1277                                                            Class
1278                                                    </td>
1279                                                     <td width="15%">
1280                                                            Fold
1281                                                    </td>
1282                                                    <td width="15%">
1283                                                           Superfamily
1284                                                    </td>
1285                                                    <td width="15%">
1286                                                           Family
1287                                                    </td>
1288                                                    <td width="15%">
1289                                                           Domain
1290                                                    </td>
1291                                                    <td width="15%">
1292                                                          Species
1293                                                     </td>
1294                                          </tr>
1295
1296                                          <tr>
1297                                         <td width="12%" class="se_datasmall1">
1298                                                 d1cy0a_
1299                                         </td>
1300
1301                                                           <td width="13%" class="se_datasmall1">
1302                                                                           <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=11&amp;n=56572"> Multi-domain proteins (alpha and beta)</a>
1303                                                           </td>
1304
1305                                                           <td width="15%" class="se_datalarge1">
1306                                                                           <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=11&amp;n=56711"> Prokaryotic type I DNA topoisomerase</a>
1307                                                           </td>
1308
1309                                                           <td width="15%" class="se_datalarge1">
1310                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=11&amp;n=56712"> Prokaryotic type I DNA topoisomerase</a>
1311                                                           </td>
1312
1313                                                           <td width="15%" class="se_datalarge1">
1314                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=11&amp;n=56713"> Prokaryotic type I DNA topoisomerase</a>
1315                                                           </td>
1316
1317                                                           <td width="15%" class="se_datalarge1">
1318                                                                           <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=11&amp;n=56714"> DNA topoisomerase I, 67K N-terminal domain</a>
1319                                                           </td>
1320
1321                                                           <td width="15%" class="se_datalarge1">
1322                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=11&amp;n=56715"> Escherichia coli</a>
1323                                                           </td>
1324                                            </tr>
1325
1326                         </table>
1327                       </td>
1328                         </tr>         
1329
1330
1331
1332                                 <tr class="se_back2">
1333                                         <td width="15%" align="right" valign="top" class="se_item">
1334                                                 <span class="external_ref">CATH Classification <font size="-2">(version v2.6.0)</font></span>
1335                                         </td>
1336                                         <td width="85%" valign="top">
1337                                             <table border="0" cellpadding="1" cellspacing="1"  width="100%" >
1338                                              <tr class="se_subitem2">
1339                                                             <td width="20%">
1340                                                                    Domain
1341                                                            </td>
1342                                                            <td width="20%">
1343                                                                   Class
1344                                                            </td>
1345                                                            <td width="20%">
1346                                                                   Architecture
1347                                                            </td>
1348                                                            <td width="20%">
1349                                                                   Topology
1350                                                            </td>
1351                                                            <td width="20%">
1352                                                                  Homology
1353                                                             </td>
1354                                               </tr>
1355
1356                                           <tr>
1357                                           <td width="20%" class="se_datasmall2">
1358                                                                            1cy0A1
1359                                                           </td>
1360
1361                                                           <td width="20%" class="se_datasmall2">
1362                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=1274">Alpha Beta</a>
1363                                                           </td>
1364
1365                                                           <td width="20%" class="se_datasmall2">
1366                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=1842">3-Layer(aba) Sandwich</a>
1367                                                           </td>
1368        
1369                                                           <td width="20%" class="se_datasmall2">
1370                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=1978">Rossmann fold</a>
1371                                                           </td>
1372
1373                                                           <td width="20%" class="se_datasmall2">
1374                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=2009">ISOMERASE</a>
1375                                                           </td>
1376                                                 </tr>       
1377
1378                                           <tr>
1379                                           <td width="20%" class="se_datasmall2">
1380                                                                            1cy0A2
1381                                                           </td>
1382
1383                                                           <td width="20%" class="se_datasmall2">
1384                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=1">Mainly Alpha</a>
1385                                                           </td>
1386
1387                                                           <td width="20%" class="se_datasmall2">
1388                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=2">Orthogonal Bundle</a>
1389                                                           </td>
1390        
1391                                                           <td width="20%" class="se_datasmall2">
1392                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=352">Topoisomerase I; domain 2</a>
1393                                                           </td>
1394
1395                                                           <td width="20%" class="se_datasmall2">
1396                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=353">Topoisomerase I, domain 2</a>
1397                                                           </td>
1398                                                 </tr>       
1399
1400                                           <tr>
1401                                           <td width="20%" class="se_datasmall2">
1402                                                                            1cy0A3
1403                                                           </td>
1404
1405                                                           <td width="20%" class="se_datasmall2">
1406                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=762">Mainly Beta</a>
1407                                                           </td>
1408
1409                                                           <td width="20%" class="se_datasmall2">
1410                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=1229">Distorted Sandwich</a>
1411                                                           </td>
1412        
1413                                                           <td width="20%" class="se_datasmall2">
1414                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=1248">Topoisomerase I; domain 3</a>
1415                                                           </td>
1416
1417                                                           <td width="20%" class="se_datasmall2">
1418                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=1249">Topoisomerase I, domain 3</a>
1419                                                           </td>
1420                                                 </tr>       
1421
1422                                           <tr>
1423                                           <td width="20%" class="se_datasmall2">
1424                                                                            1cy0A4
1425                                                           </td>
1426
1427                                                           <td width="20%" class="se_datasmall2">
1428                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=1">Mainly Alpha</a>
1429                                                           </td>
1430
1431                                                           <td width="20%" class="se_datasmall2">
1432                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=2">Orthogonal Bundle</a>
1433                                                           </td>
1434        
1435                                                           <td width="20%" class="se_datasmall2">
1436                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=307">Topoisomerase I; domain 4</a>
1437                                                           </td>
1438
1439                                                           <td width="20%" class="se_datasmall2">
1440                                                                            <a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=12&amp;n=308">Topoisomerase I, domain 4</a>
1441                                                           </td>
1442                                                 </tr>       
1443      
1444                         </table>
1445                       </td>
1446                         </tr>                   
1447
1448                 <tr class="se_back1">
1449                                         <td width="15%" align="right" valign="top" class="se_item">
1450                                                 <span class="external_ref">PFAM Classification </span>
1451                                         </td>
1452                                         <td width="85%" valign="top">
1453                                             <table border="0" cellpadding="1" cellspacing="1"  width="100%" >
1454                                              <tr class="se_subitem1">
1455                                                             <td width="5%">
1456                                                                    Chain
1457                                                            </td>
1458                                                            <td width="20%">
1459                                                                   PFAM Accession
1460                                                            </td>
1461                                                            <td width="15%">
1462                                                                   PFAM ID
1463                                                            </td>
1464                                                            <td width="25%">
1465                                                                   Description
1466                                                            </td>
1467                                                            <td width="15%">
1468                                                                  Type
1469                                                             </td>
1470                                                            <td width="20%">
1471                                                                  Clan ID                                                           
1472                                               </tr>
1473
1474
1475                                                 <tr>
1476                                           <td width="5%" class="se_datasmall1">
1477                                                                            A
1478                                                           </td>
1479                                                           <td width="20%" class="se_datasmall1">
1480                                                                            <a class="se_datalarge" href="http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?PF01131" target=resource_window>PF01131</a>
1481                                                           </td>
1482                                                           <td width="15%" class="se_datasmall1">
1483                                                                            Topoisom_bac
1484                                                           </td>
1485                                                           <td width="25%" class="se_datasmall1">
1486                                                                            DNA topoisomerase
1487                                                           </td>
1488                                                           <td width="15%" class="se_datasmall1">
1489                                                                            Family
1490                                                           </td>
1491                                                           <td width="20%" class="se_datasmall1" title="Clan Description: n/a">
1492                                                                            n/a</a>
1493                                                           </td>
1494                                                 </tr>       
1495
1496                                                 <tr>
1497                                           <td width="5%" class="se_datasmall1">
1498                                                                            A
1499                                                           </td>
1500                                                           <td width="20%" class="se_datasmall1">
1501                                                                            <a class="se_datalarge" href="http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?PF01751" target=resource_window>PF01751</a>
1502                                                           </td>
1503                                                           <td width="15%" class="se_datasmall1">
1504                                                                            Toprim
1505                                                           </td>
1506                                                           <td width="25%" class="se_datasmall1">
1507                                                                            Toprim domain
1508                                                           </td>
1509                                                           <td width="15%" class="se_datasmall1">
1510                                                                            Family
1511                                                           </td>
1512                                                           <td width="20%" class="se_datasmall1" title="Clan Description: n/a">
1513                                                                            n/a</a>
1514                                                           </td>
1515                                                 </tr>       
1516
1517                                         </table>
1518                       </td>
1519                         </tr>             
1520
1521
1522                                   <tr class="se_back2">
1523                                         <td width="15%" align="right" valign="top" class="se_item">
1524                                                 <span class="external_ref">GO Terms</span>
1525                                         </td>
1526                                         <td width="85%" valign="top">
1527                                     <table border="0" cellpadding="1" cellspacing="1"  width="100%" >
1528                                      <tr class="se_subitem1">
1529                                             <td width="25%">
1530                                                         Polymer
1531                                            </td>
1532                                            <td width="25%">
1533                                                    Molecular Function
1534                                            </td>
1535                                            <td width="25%">
1536                                                    Biological Process
1537                                            </td>
1538                                            <td width="25%">
1539                                                    Cellular Component
1540                                             </td>
1541                                   </tr>
1542                                  
1543
1544                 <tr>
1545                                          <td width="25%" class="se_datasmall1">
1546                                                         DNA TOPOISOMERASE I (1CY0:A)
1547                                          </td>
1548                                          <td width="25%" class="se_datasmall1">
1549                                                <ul>
1550
1551                          <li><a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=6&amp;n=3676">nucleic acid binding</a>
1552
1553                          <li><a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=6&amp;n=3677">DNA binding</a>
1554
1555                          <li><a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=6&amp;n=3916">DNA topoisomerase activity</a>
1556
1557                          <li><a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=6&amp;n=3917">DNA topoisomerase type I activity</a>
1558
1559                       </ul>
1560                    </td>
1561                    <td width="25%" class="se_datasmall1">
1562                                                <ul>
1563
1564                            <li><a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=4&amp;n=6265">DNA topological change</a>
1565
1566                            <li><a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=4&amp;n=6268">DNA unwinding during replication</a>
1567
1568                            <li><a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=4&amp;n=6304">DNA modification</a>
1569
1570                       </ul>
1571                    </td>
1572                     <td width="25%" class="se_datasmall1">
1573                                                <ul>
1574
1575                            <li><a class="se_datasmall" href="/pdb/search/smartSubquery.do?smartSearchSubtype=TreeQuery&t=5&amp;n=5694">chromosome</a>
1576
1577                       </ul>
1578                    </td>
1579                 </tr>
1580
1581                         </table>
1582                         </td>
1583                   </tr>
1584                   <tr>
1585                           <td>
1586                           <!-- Form is setup for the PDF Creator to grab the data -->
1587                                 <FORM NAME="PDFCREATOR" ACTION="/pdb/sepdf" method="post" target="sepdfwin">
1588                                         <input type="hidden" name="strId" value="1CY0">
1589                                         <input type="hidden" name="strTitle" value="COMPLEX OF E.COLI DNA TOPOISOMERASE I WITH 3'-5'-ADENOSINE DIPHOSPHATE">
1590                                         <input type="hidden" name="strAuthors" value="Feinberg, H., Changela, A., Mondragon, A.">
1591                                         <input type="hidden" name="strPrimaryCitations" value="Feinberg, H., Changela, A., Mondragon, A.">
1592                                         <input type="hidden" name="strCitationTitle" value="Protein-nucleotide interactions in E. coli DNA topoisomerase I.">
1593                                         <input type="hidden" name="strJournalAbbrev" value="Nat.Struct.Biol.">
1594                                         <input type="hidden" name="strJounralVolume" value="v6">
1595                                         <input type="hidden" name="strJournalPage" value="961-968">
1596                                         <input type="hidden" name="strJournalYear" value="1999">
1597                                         <input type="hidden" name="strDeposition" value="1999-08-31">
1598                                         <input type="hidden" name="strRelease" value="2000-03-08">
1599                                         <input type="hidden" name="strExpMethod" value="X-RAY DIFFRACTION">
1600                                         <input type="hidden" name="strResolution" value="2.450000047683716">
1601                                         <input type="hidden" name="strRValue" value="0.2199999988079071">
1602                                         <input type="hidden" name="strRValueType" value="(obs.)">
1603                                         <input type="hidden" name="strRFree" value="0.27399998903274536">
1604
1605                                                                                
1606                                         <input type="hidden" name="strSpaceGroup" value="P 21 21 21">
1607                                        
1608                                        
1609                                        
1610                                         <input type="hidden" name="strLengthA" value="63.040000915527344">
1611                                        
1612                                        
1613                                        
1614                                        
1615                                         <input type="hidden" name="strLengthB" value="73.30999755859375">                                       
1616                                                                                
1617
1618                                        
1619                                        
1620                                         <input type="hidden" name="strLengthC" value="134.4600067138672">                                       
1621                                                                                
1622                                        
1623                                        
1624                                         <input type="hidden" name="strAngleA" value="90.0">                                     
1625                                                                                
1626                                        
1627                                        
1628                                         <input type="hidden" name="strAngleB" value="90.0">                                     
1629                                                                                
1630                                        
1631                                        
1632                                         <input type="hidden" name="strAngleC" value="90.0">                                     
1633                                                        
1634                                        
1635                                                                
1636                                         <input type="hidden" name="strMolDesc" value="">
1637                                         <input type="hidden" name="strKeywords" value="ISOMERASE">
1638                                         <input type="hidden" name="strSourceLine" value="Polymer: 1, Scientific Name: ESCHERICHIA COLI
1639 Common Name: BACTERIA     Expression System: ESCHERICHIA COLI">
1640                                         <input type="hidden" name="strChemCompIdList" value="A3P,">
1641                                         <input type="hidden" name="strChemCompNameList" value="ADENOSINE-3'-5'-DIPHOSPHATE,">
1642                                         <input type="hidden" name="strChemCompFormulaList" value="C10 H15 N5 O10 P2,">
1643                                         <input type="hidden" name="strChemCompListLen" value="1">
1644                                         <input type="hidden" name="strScopFoldDesc" value="Prokaryotic type I DNA topoisomerase">
1645                                         <input type="hidden" name="strScopSFDesc" value="Prokaryotic type I DNA topoisomerase">
1646                                         <input type="hidden" name="strScopFamDesc" value="Prokaryotic type I DNA topoisomerase">
1647                                         <input type="hidden" name="strScopDomDesc" value="DNA topoisomerase I, 67K N-terminal domain">
1648                                         <input type="hidden" name="strScopSpeciesDesc" value="Escherichia coli">
1649                                         <input type="hidden" name="strCathDomainList" value="1cy0A1,1cy0A2,1cy0A3,1cy0A4,">
1650                                         <input type="hidden" name="strCathClassList" value="Alpha Beta,Mainly Alpha,Mainly Beta,Mainly Alpha,">
1651                                         <input type="hidden" name="strCathArchitectureList" value="3-Layer(aba) Sandwich,Orthogonal Bundle,Distorted Sandwich,Orthogonal Bundle,">
1652                                         <input type="hidden" name="strCathTopologyList" value="Rossmann fold,Topoisomerase I; domain 2,Topoisomerase I; domain 3,Topoisomerase I; domain 4,">
1653                                         <input type="hidden" name="strCathHomologyList" value="ISOMERASE,Topoisomerase I, domain 2,Topoisomerase I, domain 3,Topoisomerase I, domain 4,">
1654                                         <input type="hidden" name="strCathListLen" value="4">
1655                                         <input type="hidden" name="strGoMoleculeList" value="DNA TOPOISOMERASE I*,*">                                   
1656                                         <input type="hidden" name="strGoMolecularFunctionList" value="nucleic acid binding,DNA binding,DNA topoisomerase activity,DNA topoisomerase type I activity,||DNA topological change,DNA unwinding during replication,DNA modification,chromosome,">                                   
1657                                         <input type="hidden" name="strGoBiologicalProcessList" value="||">                                     
1658                                         <input type="hidden" name="strGoCellularComponentList" value="||">                                     
1659                                         <input type="hidden" name="strGoListLen" value="1">                                     
1660
1661                                        
1662                                        
1663                                        
1664                                         <!--a href="#" onClick="javascript:document.PDFCREATOR.submit();">|PDF|</a-->
1665                                 </form>
1666                
1667                           </td>
1668                   </tr>
1669         </table>
1670
1671
1672 </td>
1673    </tr>
1674    <tr>
1675     <td valign="top" class="maintd"> </td>
1676    </tr>
1677   </table>
1678
1679   </td>
1680  </tr>  <!-- END MAIN RIGHT -->
1681
1682
1683  
1684  <tr>
1685   <td colspan="3">   
1686
1687  
1688
1689 <table width="100%" border="0" cellpadding="0" cellspacing="0">
1690  <tr>  <!-- BEGIN BOTTOM BORDER -->
1691   <td class="foothomelight"><img src="/pdbstatic/images/spacer.gif" alt="" height="15" border="0"></td>
1692   <td class="foothomelight" colspan="2"></td>
1693  </tr>
1694  <tr>
1695   <td class="foothomedark1" colspan="3">
1696     <a class="foothomedark1" href="http://home.rcsb.org/" target="_blank"><i>&copy; RCSB Protein Data Bank &nbsp;&nbsp;</i></a>
1697   </td>
1698  </tr>  <!-- END BOTTOM BORDER -->
1699
1700 <tr>
1701  <td colspan="3"><br></td>
1702 </tr>
1703 </table>
1704
1705   </td>
1706  </tr>
1707
1708 </tbody>
1709 </table>
1710
1711 <script type="text/javascript" language="JavaScript" src="/pdbstatic/common/lib/wz_tooltip.js"></script>
1712 <script type="text/javascript" language=javascript src="/pdbstatic/common/lib/awstats_misc_tracker.js"></script>
1713 <script type="text/javascript" language="JavaScript">
1714 var htmlHttpLoader = new RemoteFileLoader('htmlHttpLoader');
1715 // Props to Simon Willison:
1716 // http://simon.incutio.com/archive/2004/05/26/addLoadEvent
1717 var oldhandler = window.onload;
1718 window.onload = (typeof oldhandler == "function")
1719     ? function() { oldhandler(); initExpandableLists(); } : initExpandableLists;
1720 </script>
1721 </body>
1722 </html>
hpricot-0.8.6/test/files/boingboing.html0000644000175000017500000037273611710073440017656 0ustar boutilboutil Boing Boing: A Directory of Wonderful Things
Boing Boing

Wednesday, June 14, 2006

Slipstream Science Fiction anthology defies genre conventions

James Patrick Kelly and John Kessel gave a great interview to Sci Fi Weekly about their new anthology, Feeling Very Strange: The Slipstream Anthology. The book has a top-notch table-of-contents, stories that defy genre conventions and make your head spin in a good way.
We make the point in our introduction that slipstream isn't really a genre at the moment and may never be one. What it is, in our opinion, is a literary effect--in the same way that horror or comedy are literary effects achieved by many different kinds of dissimilar stories. What is that effect? We borrowed the term cognitive dissonance from the psychologists. When we are presented with two contradictory cognitions--impressions, feelings, beliefs--we experience cognitive dissonance, a kind of psychic discomfort that we normally try to ease by discounting one of the cognitions as false or illusory and promoting the other to reality. But in some cases we aren't well served by this convenient sorting out.

We think that what slipstream stories do is to embrace cognitive dissonance. F. Scott Fitzgerald once said that "The test of a first-rate intelligence is the ability to hold two opposing ideas in mind at the same time and still retain the ability to function." We believe that such an ability is necessary to cope with life in the 21st century and that stories that ask us to exercise that ability are an expression of the zeitgeist. Do you really need a definitive answer as to whether an electron is a wave or a particle? Why? Maybe it's time to make room for uncertainty in contemporary fiction, even if the stories do make you feel very strange. Slipstream may use metafictional techniques to estrange us from consensus reality, they may rewrite history, they may mash up different styles or genres. But that's the point, as we see it. Slipstream has no rules, it has only results.

Link (via Beyond the Beyond)

posted by Cory Doctorow at 03:43:28 AM permalink | blogs' comments

College kids reportedly taking more smart drugs

High-achieving college kids are reportedly dipping into "brain-steroids" -- drugs like Ritalin and Provigil, which focus attention. No one really knows how widespread this practice is, since it's uncommon for anyone to get busted for peddling smart drugs, and the side-effects of "abuse" are minimal.

This strikes me as the canonical cognitive liberty fight: why shouldn't you be allowed to make an informed decision about what state of mind you'd like to be in? Why will the law allow people to kill brain and liver cells with stupefying booze, but not smart drugs?

"What was a surprise, though, was the alarming rate of senior business majors who have used" the drugs, he writes. Almost 90 percent reported at least occasional use of "smart pills" at crunch times such as final exams, including Adderall, Ritalin, Strattera and others. Of those, three-quarters did not have a legitimate prescription, obtaining the pills from friends. "We were shocked," Salantrie writes. He says that in his report, he was "attempting to bring to light the secondary market for Adderall" specifically because "most of the university is not aware" of its extent, he says.
Link (via Futurismic)

posted by Cory Doctorow at 03:33:51 AM permalink | blogs' comments

UFO sighting picture photoshopping contest

The next Worth1000 photoshopping contest challenges artists to fake UFO-sightings photos. The quality of entries here is a little uneven, but the best of the lot are real gems. Link

posted by Cory Doctorow at 03:26:59 AM permalink | blogs' comments

Rube Goldberg machine built out of sticks and stones

There's a feature on today's Make video podcast about a giant, elaborate Rube Goldberg machine assembled out of sticks and stones in a forest. The video features some jaw-dropping, Mousetrap-style action, and the use of found forest-floor materials makes it all the more Wile E Coyote. The video features tips on setting up your own woodsy contraption. Link

posted by Cory Doctorow at 02:59:42 AM permalink | blogs' comments

Lampooning the American dismissal of Gitmo suicides

Fafblog today features a scathing, brilliant satirical look at the US characterization of the Guantanamo Bay suicides as an attack on America. Fafblog is consistently the best political satire/commentary on the net, the Web equivalent of Jon Stewart and Stephen Colbert, and they're finally back after a too-long hiatus. The characterization of the Gitmo suicides as an act of terrorism is so ugly and disingenuous that it begged to be punctured. I'm thankful that Fafblog is back to perform that service.
Run for your lives - America is under attack! Just days ago three prisoners at Guantanamo Bay committed suicide in a savage assault on America's freedom to not care about prisoner suicides! Oh sure, the "Blame Atrocities First" crowd will tell you these prisoners were "driven to despair," that they "had no rights," that they were "held and tortured without due process or judicial oversight in a nightmarish mockery of justice." But what they won't tell you is that they only committed suicide as part of a diabolical ruse to trick the world into thinking our secret torture camp is the kind of secret torture camp that drives its prisoners to commit suicide! This fiendish attempt to slander the great American institution of the gulag is nothing less than an act of asymmetrical warfare against the United States - a noose is just a suicide bomb with a very small blast radius, people! - and when faced with a terrorist attack, America must respond. Giblets demands immediate retaliatory airstrikes on depressed Muslim torture victims throughout the mideast!

"Oh but Giblets there are dozens of innocent prisoners in Guantanamo" you say because you are a namby-pamby appeasenik who suckles at the teat of terror. Well if these Guantanamo prisoners are so innocent then what are they doing in Guantanamo? Sneaking into our secret military prisons as part of an elaborate plot to make it look like we're holding them in our secret military prisons, that's what! And once they get there they can chain themselves to the floor, break their bones on helpless guards' fists, and waterboard themselves to their heart's content to further their sinister Salafi scheme to sully the reputation of secret American torture facilities everywhere!

Link

posted by Cory Doctorow at 02:55:02 AM permalink | blogs' comments

Neil Gaiman tribute CD sneak-peek

A new Neil Gaiman tribute CD is coming out in July. One of the tracks is already available -- "Mr Punch" by Future Bible Heroes, and it's a delight. Apparently, Stephin Merritt (from Future Bible Heroes) is also doing a Lemony Snickett-inspired CD in October.
Track Listing:
1 Rasputina - Coraline
2 ThouShaltNot - When Everyone Forgets
3 Tapping The Vein - Trader Boy
4 Lunascape - Raven Star
5 Deine Lakaien - A Fish Called Prince
6 Thea Gilmore - Even Gods Do
7 Rose Berlin (feat. Curve) - Coraline
8 Schandmaul - Magda Treadgolds Märchen
9 Hungry Lucy - We Won't Go
10 Voltaire w/The Oddz - Come Sweet Death
11 Future Bible Heroes - Mr. Punch
12 Razed in Black - The Endless
13 The Cruxshadows - Wake the White Queen
14 Ego Likeness - You Better Leave the Stars Alone
15 Azam Ali - The Cold Black Key
16 Joachim Witt - Vandemar
17 Tori Amos - Sister Named Desire (New Master)
Link (Thanks, Gary!)

posted by Cory Doctorow at 02:50:59 AM permalink | blogs' comments

Block DRM license plate

Tom spotted this DC license plate, reading BLK DRM. He thinks it's an anti-DRM lobbyist's plate, which is plausible, though with the acronym soup in Washington, it could stand for just about anything. Link (Thanks, Tom)

posted by Cory Doctorow at 02:43:40 AM permalink | blogs' comments

Tuesday, June 13, 2006

Chairs upholstered with lush photos

ClothUK makes easy chairs and other soft furnishings upholstered with fabric that's printed with the lush, oversized photo of your choice. Not cheap, tho! Link (via Wonderland)

posted by Cory Doctorow at 06:21:37 PM permalink | blogs' comments

Corruptibles: Copyright's tech-fighting supervillains

EFF has just launched a new video: The Corruptibles -- the story of Copyright Supervillains who patrol the Broadcast Flag future, blowing up our free and open devices. It's a great, funny viral short, and well worth a watch. Link

posted by Cory Doctorow at 06:14:53 PM permalink | blogs' comments

WIPO meets to screw up podcasting, Barcelona, June 21

The United Nations' World Intellectual Property Organization has called a last-minute meeting on June 21 in Barcelona, out of the normal diplomatic venues to try to ram through the Broadcasting Treaty. This treaty gives broadcasters (not creators or copyright holders) the right to tie up the use of audiovisual material for 50 years after broadcasting it, even if the programs are in the public domain, Creative Commons licensed, or not copyrightable.

The Barcelona meeting brings together lots of latinamerican broadcasters -- who no doubt love the idea of a new monopoly right that they get for free merely for broadcasting a work. Bringing these casters in is a way of undermining the effective opposition to the treaty that's come from countries like Brazil and Chile.

No public interest groups are on the bill to give a counterpoint (of course not -- WIPO is the kind of place where public interest groups' handouts are thrown in the toilets' trashcans).

This meeting is especially deadly, because it looks like they're trying to sneak podcasting back into the treaty, after agreeing to take it out at the last big meeting in Geneva.

The good news is, it's open to the public. If you're a digital rights activist in Barcelona -- or just someone who cares about how big corporations are taking away your rights to use works freely -- then you need to be at this meeting.

Webcasting will clearly be part of next week's discussions. That much is clear from the title of next week's event: "From the Rome Convention to Podcasting". One of the invited speakers is from Yahoo! Europe, one of the proponents of new rights for webcasters. This, despite the fact that webcasting and simulcasting were taken out of the "traditional" Broadcasting Treaty and put on the slow track last month in response to concerns expressed by the majority of WIPO member states.

The good news: unlike earlier meetings, this one is open to the public, with prior registration requested. So if you care about the proposed treaties and can get to the Barcelona meeting, this is your opportunity to stand up and be counted for the public interest.

If you’re in the U.S., please tell your Congressional representatives to hold hearings on the proposed treaties before it’s too late. And if you need a reminder about the harm that these treaties could wreak on access to knowledge and technological innovation, read Jamie Boyle’s piece in today’s Financial Times.

Link

Update: Jamie Boyle has an excellent column that explains how this treaty (which the US is fighting for) would be unconstitutional in the USA.

posted by Cory Doctorow at 06:09:34 PM permalink | blogs' comments

From the Boing Boing archives, circa 1999

I came across this funny list of "Things to Do," written by "fifth Boing Boinger" Stefan Jones, which was published on the pre-blog version of boingboing.net.
1. Get $25 worth of paper currency from one of those countries where $25 worth of currency fills up two wheel barrows. Divide it into five lots and send them, along with an incomprehensible letter, to the addresses listed in an email chain letter.

2. Build some gigantic rat traps, with wooden bases at least 2' x 3' and baited with an entire blocks of government cheese. Plant the traps, in sprung state, near a local chemical company. Wear giant rat foot shoes while doing this.

3. Get a supply of those little plastic ties used to seal hotel minibars after they are loaded with a full complement of overpriced goodies. Bring them and a supply of useful things (socks, condoms, aspirin) and strange things (McGruff the Crime Dog coloring books, bottles of Moxie, a can of Hormel Calf Brains in Milk Gravy) while travelling. Put the things in the minibar before sealing it up.

Link

posted by Mark Frauenfelder at 04:57:07 PM permalink | blogs' comments

Surreal English lessons video from Japan

Picture 3-10 Very odd video of Japanese dancing girls and salarymen uttering defensive rebuttals in English. Link (via Sharpeworld)

posted by Mark Frauenfelder at 01:44:02 PM permalink | blogs' comments

Sexed robots video

Here's a video of two adorable, autonomous "sexed robots."
 Zlab Zlabpics Sexedmaleandfemale The sexed robots are autonomous wheeled platforms fitted with nylon genital organs, respectively male and female. They are programmed to explore their environment, occasionally entering a "in heat" mode, where they will try and locate a partner in the same state. If a partner is located, the robots will attempt to mate.
Link NSFW? (via Sharpeworld)

posted by Mark Frauenfelder at 01:38:27 PM permalink | blogs' comments

Burroughs' Encyclopaedia of Astounding Facts and Useful Information, 1889

Manybooks.net, which converts Project Gutenberg titles into useful formats for reading on Palm devics, iPods, and ebook readers, recently made available a fantastic compendium called Burroughs' Encyclopaedia of Astounding Facts and Useful Information, 1889: Universal Assistant and Treasure-House of Information to be Consulted on Every Question That Arises in Everyday Life by Young and Old Alike!.

It's an amazing combination of a proto-Ripley's, a cookbook, etiquette guide, and almanac.

200606131251 WONDERS OF MINUTE WORKMANSHIP.

In the twentieth year of Queen Elizabeth, a blacksmith named Mark Scaliot, made a lock consisting of eleven pieces of iron, steel and brass, all which, together with a key to it, weighed but one grain of gold. He also made a chain of gold, consisting of forty-three links, and, having fastened this to the before-mentioned lock and key, he put the chain about the neck of a flea, which drew them all with ease. All these together, lock and key, chain and flea, weighed only one grain and a half.

Oswaldus Norhingerus, who was more famous even than Scaliot for his minute contrivances, is said to have made 1,600 dishes of turned ivory, all perfect and complete in every part, yet so small, thin and slender, that all of them were included at once in a cup turned out of a pepper-corn of the common size. Johannes Shad, of Mitelbrach, carried this wonderful work with him to Rome, and showed it to Pope Paul V., who saw and counted them all by the help of a pair of spectacles. They were so little as to be almost invisible to the eye.

Johannes Ferrarius, a Jesuit, had in his posession cannons of wood, with their carriages, wheels, and all other military furniture, all of which were also contained in a pepper-corn of the ordinary size.

Link

posted by Mark Frauenfelder at 12:57:03 PM permalink | blogs' comments

Door looks like you walk through it

200606131240 Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean. Link

posted by Mark Frauenfelder at 12:39:56 PM permalink | blogs' comments

AOL's efforts to keep you from quitting your account

Listen to this recording of a guy who called AOL to try to cancel his account and the AOL jerk who tries to keep him from canceling. Just disgusting. Link (via Digg)

posted by Mark Frauenfelder at 12:27:01 PM permalink | blogs' comments

LA's South Central Farm under police siege right now

The police have closed on South Central Farm, the largest community garden in the USA. The farms were planted after the Rodney King uprising, when the land was given to the neighborhood, and it has been reclaimed and cultivated by 350 families. The city reneged on its promise and sold the land to a developer, who has now moved on it with bulldozers and a riot squad.
The South Central Farm is currently under attack. An early morning raid began this 5-hour long eviction that is still in process. Trees are being cut down, bulldozers are leveling the families’ food, hundreds of protesters are on site rallying with tears in their eyes as the nation’s largest urban farm is destroyed before them. The L.A.P.D. is on tactical alert as fire ladders and cherry pickers are being brought in to remove the tree-sitters. The 350 families created this oasis 14 years ago in the wake of the 1992 uprising when this land was offered to the community by the then Mayor as a form of mitigation.
Link, Flickr's southcentralfarm tag (Thanks to everyone who wrote in with this link)

Update: Elan sez, "the land for the farm was originally taken from Ralph Horowitz through eminent domain with the intension of using it for a trash incinerator. When the incinerator fell through, the city was required to sell it back to the Horowitz (after a ten year period of first refusal)."

posted by Cory Doctorow at 11:48:42 AM permalink | blogs' comments

Stephen Hawking writing a kids' book

Stephen Hawking and his daughter are collaborating on a kids' novel that is "a bit like Harry Potter, but without the magic."
His daughter Lucy said their forthcoming project would be aimed at people like her own eight-year-old son.

"It is a story for children, which explains the wonders of the universe," she said.

Link (via Fark)

posted by Cory Doctorow at 11:42:15 AM permalink | blogs' comments

ScienceMatters@Berkeley, June issue

My new issue of ScienceMatters@Berkeley is online. In this issue:
 Archives Volume3 Issue21 Images Oster3
* Start Your Protein Engines

* The New New Math of String Theory

* Molecular Rules of Engagement
Link

posted by David Pescovitz at 11:42:06 AM permalink | blogs' comments

iPod dock/speakers built into bumwad dispenser

This iPod dock and speakers built into a bumwad dispenser isn't as weird as it seems at first blush -- lots of us have a radio in the bathroom; this is a way of listening to your iPod without sacrificing your limited counterspace to an electronics footprint. Link (via Popgadget)

posted by Cory Doctorow at 11:38:34 AM permalink | blogs' comments

Cory's Someone to Town shortlisted for Canada's sf book award

I'm pleased as punch to say that my novel, Someone Comes to Town, Someone Leave Town has been shortlisted for the Sunburst, Canada's national science fiction award. The Sunburst jury honored me with the award in 2004 for my short story collection A Place So Foreign and Eight More and this is a double-helping of delight.

Someone Comes to Town... comes out in a new trade paperback edition this week, too! Link

posted by Cory Doctorow at 10:59:36 AM permalink | blogs' comments

People are happier when they're older?

A new study suggests that people may think that the happiest days of their lives are when they're young, but that belief doesn't jibe with reality. University of Michigan and VA Ann Arbor healthcare Systems researchers polled 540 adults in the 21-40 age group and 60+ age group. They rated their own happiness right now, predicted how happy they'd be in the future, and also how happy they think others are in those age groups. The results were published in the Journal of Happiness Studies, which is a delightful name for a scientific publication. From the University of Michigan Health System:
"Overall, people got it wrong, believing that most people become less happy as they age, when in fact this study and others have shown that people tend to become happier over time," says lead author Heather Lacey, Ph.D., a VA postdoctoral fellow and member of the U-M Medical School's Center for Behavioral and Decision Sciences in Medicine. "Not only do younger people believe that older people are less happy, but older people believe they and others must have been happier 'back then'. Neither belief is accurate..."

"People often believe that happiness is a matter of circumstance, that if something good happens, they will experience long-lasting happiness, or if something bad happens, they will experience long-term misery," (says co-author Peter Ubel). "But instead, people's happiness results more from their underlying emotional resources -- resources that appear to grow with age. People get better at managing life's ups and downs, and the result is that as they age, they become happier -- even though their objective circumstances, such as their health, decline."
Link

posted by David Pescovitz at 10:58:28 AM permalink | blogs' comments

The incredible sound-mimicking lyrebird

Lyrebird Here's a video clip of a male Australian lyrebird, which sings complex songs to attract mates. Lyrebirds' songs are composed of sounds they hear, including sounds from machines, such as a camera's shutter mechanism and film drive, a car alarm, and logging equipment. This bird is like a tape recorder. Link (thanks, Coop!)

posted by Mark Frauenfelder at 10:53:26 AM permalink | blogs' comments

Aymara people's "reversed" concept of time

The Aymara, an indigenous group in the Andes highlands, have a concept of time that's opposite our own spatial metaphor. A new study by cognitive scientists explains how the Aymara consider the past to be ahead and the future behind them. According to the study, this is the first documented culture that seems not to have mapped time with the properties of space "as if (the future) were in front of ego and the past in back." From UCSD:
The linguistic evidence seems, on the surface, clear: The Aymara language recruits “nayra,” the basic word for “eye,” “front” or “sight,” to mean “past” and recruits “qhipa,” the basic word for “back” or “behind,” to mean “future.” So, for example, the expression “nayra mara” – which translates in meaning to “last year” – can be literally glossed as “front year..."

The Aymara, especially the elderly who didn’t command a grammatically correct Spanish, indicated space behind themselves when speaking of the future – by thumbing or waving over their shoulders – and indicated space in front of themselves when speaking of the past – by sweeping forward with their hands and arms, close to their bodies for now or the near past and farther out, to the full extent of the arm, for ancient times. In other words, they used gestures identical to the familiar ones – only exactly in reverse.

“These findings suggest that cognition of such everyday abstractions as time is at least partly a cultural phenomenon,” (University of California, San Diego professor Rafael) Nunez said. “That we construe time on a front-back axis, treating future and past as though they were locations ahead and behind, is strongly influenced by the way we move, by our dorsoventral morphology, by our frontal binocular vision, etc. Ultimately, had we been blob-ish amoeba-like creatures, we wouldn’t have had the means to create and bring forth these concepts.

“But the Aymara counter-example makes plain that there is room for cultural variation. With the same bodies – the same neuroanatomy, neurotransmitters and all – here we have a basic concept that is utterly different,” he said.
Link

posted by David Pescovitz at 10:05:40 AM permalink | blogs' comments

Webby Awards last night, with Prince

Prince
Prince performed an acoustic number at last night's Webby Awards in NYC. Prince won a Lifetime Achievement Award. His five word acceptance speech: "Everything you think is true." Also in attendance were Robert Kahn, Gorillaz, Arianna Huffington, and dozens of other interesting folks. Rob Corddry hosted. Congrats to all the winners and our friends at the Webby Awards for what sounds like an amazing ceremony! Check Rocketboom for the edit of the evening.
Link

posted by David Pescovitz at 09:19:33 AM permalink | blogs' comments

Electrical substations disguised as houses

Toronto Hydro, the electrical authority in Toronto, has spent decades building electrical substations that are disguised as typical family houses:
In 1987, Canadian photographer Robin Collyer began documenting houses that aren't houses at all – they're architecturally-disguised electrical substations, complete with windows, blinds, and bourgeois landscaping.

"During the 1950s and 1960s," Collyer explains in a recent issue of Cabinet Magazine, "the Hydro-Electric public utilities in the metropolitan region of Toronto built structures known as 'Bungalow-Style Substations.' These stations, which have transforming and switching functions, were constructed in a manner that mimics the style and character of the different neighborhoods."

Link

posted by Cory Doctorow at 04:53:31 AM permalink | blogs' comments

Candyland board made from 100k beads

Peggy Dembicer cloned a 1978 Candyland game-board using over 100,000 novelty beads. She's documented the finished project on Flickr, with details of some of the finer work. Link (via Make Blog)

posted by Cory Doctorow at 12:10:49 AM permalink | blogs' comments

Monday, June 12, 2006

Stanford prof sues James Joyce estate for right to study Joyce

A prof at Stanford University is suing the estate of James Joyce over the estate's long practice of destroying documents vital to Joyce scholarship, and of intimidating academics and creators who want to study and extend the works of Joyce. Carol Shloss, a Joyce scholar, has worked for 15 years on a book about the ways in which the book Finnegans Wake was inspired by Joyce's mentally ill daughter. Joyce's grandson, Stephen Joyce, have allegedly destroyed documents relating to this to undermine her book.

This isn't the first time that Stephen Joyce has hurt the cause of scholarship about his grandfather. He threatened to sue the Irish Museum over its exhibition of Joyce's papers. He threatened to sue pubs in Ireland for allowing people to read aloud from Joyce's novels on Bloomsday, the celebration of Ulysses. He told symphonic composers that they couldn't put Joyce quotations in their symphonies.

Most tragically, there was a brief moment when Stephen Joyce was irrelevant. The works of James Joyce were in the public domain until the EU copyright directive extended copyright by 20 years, putting Joyce's books back into the care of his capricious grandson for decades.

There's a whole body of scholarship devoted to tracking the ways in which Stephen Joyce has made himself the enemy of academics and Joyce lovers. The best work to start with is Matthew Rimmer's Bloomsday: Copyright Estates and Cultural Festivals.

Before the book was published, publisher Farrar, Straus and Giroux removed several supporting citations from Shloss' tome to avoid a lawsuit, according to Olson. Shloss wants to post that information as an electronic appendix to answer several critics who charged that "To Dance in the Wake" was interesting, but thin on documentary evidence, Olson said.

"It's painful once you've written something ... that you think is complete and good, to have it hacked up," Olson said. "There is a desire to bring it forth in the way she originally intended."

Shloss prepared the Web site last year but never made it public because she worried about being sued, Olson said. Among the items excised from the book are quotations from "Finnegans Wake" she thinks support her thesis, as well as letters between James Joyce and his daughter, according to Olson.

Shloss wants the court to declare she's entitled to use information the estate controls under laws that allow authors to quote copyrighted works if they do it in "a scholarly transformative manner."

Link (Thanks, Vidiot!)

Update: This New Yorker article on the case is full of great color and background, and includes the fact that Larry Lessig, founder of the Creative Commons project, is arguing the case.

posted by Cory Doctorow at 10:52:01 PM permalink | blogs' comments

HOWTO turn a NES controller into a cell-phone


Diyhappy took apart a Nokia 3200 -- which had interchangeable faceplates and was thus readily uncased -- and rebuilt it inside an old Nintendo Entertainment System controller. He dremelled out holes for the buttons and the screen and voila, the NES mobile phone. Link (Thanks, Sam!)

posted by Cory Doctorow at 10:39:04 PM permalink | blogs' comments

HOWTO make cufflinks out of Ethernet connectors

Mark got invited to a fancy party and couldn't find his cufflinks, so he hacked a pair out of some Ethernet connectors and bits of wire; and thus the crimp-your-own cufflink was born. He's written up his mod in detail for others who want to follow suit. Link (Thanks, Mark!)

posted by Cory Doctorow at 10:32:58 PM permalink | blogs' comments

Inside China's iPod sweat-shops

A British paper sent a reporter to "iPod City," the plant in Longhua, China, where iPods are assembled by women who earn $50/month for working 15 hour days.

My guess is that this is no worse than the conditions in which Powerbooks, Thinkpads, Zens, Linksys routers, etc are manufactured, but Christ, this is depressing.

The Mail visited some of these factories and spoke with staff there. It reports that Foxconn's Longhua plant houses 200,000 workers, remarking: "This iPod City has a population bigger than Newcastle's."

The report claims Longhua's workers live in dormitories that house 100 people, and that visitors from the outside world are not permitted. Workers toil for 15-hours a day to make the iconic music player, the report claims. They earn £27 per month. The report reveals that the iPod nano is made in a five-storey factory (E3) that is secured by police officers.

Another factory in Suzhou, Shanghai, makes iPod shuffles. The workers are housed outside the plant, and earn £54 per month - but they must pay for their accommodation and food, "which takes up half their salaries", the report observes.

Link (Thanks, Tony!)

Update: A former Nokia employee adds, "Add Nokia phones to your list. The type label may say 'Made in Finland' (top-notch models) or 'Made in Hungary' (mid-range ones), but Nokia cellphone engines (ie. the actual hardware) are manufactured by Foxconn in Longhua, China... unless they've found a cheaper supplier. Yes, I actually worked at the plant for a few months between real jobs."

Update 2: Apple has promised to investigate the labor conditions in its iPod factories.

posted by Cory Doctorow at 10:29:26 PM permalink | blogs' comments

Dave Alvin profile

200606122126 Colin Berry's Dave Alvin piece, which ran on KQED's "California Report" a couple weeks ago, is now available online.

Colin says: "Dave's new album [West of the West] is a tribute to California songwriters, including Tom Waits, Kate Wolf, Merle Haggard, Los Lobos, and others. I hung with him in the studio and talked to him (and some of the original songwriters) during the making of it." Link

posted by Mark Frauenfelder at 09:26:09 PM permalink | blogs' comments

Tim Biskup's tiny sculpture

Iki-Happy O Iki-Sad O
Tim Biskup has created a tiny bronze sculpture to sell at his Laguna Art Museum retrospective (along with Gary Baseman).
Iki stands 2.25" tall, has dual faces, is limited to 44 signed and numbered pieces,  and comes in a letterpressed packaging. Iki will be available at the Saturday opening (6.17.06, 8 - 10 PM)  of Tim's joint retrospective Pervasion show with Gary Baseman at the Laguna Art Museum.
Link (Thanks, Scott!)

posted by Mark Frauenfelder at 09:21:26 PM permalink | blogs' comments

Covers from '60s French satirical magazine: Hara Kiri

Hara Kiri Here are a bunch of cover scans of a magazine I didn't know about until today. According to Wikipedia, Hara Kiri was created in 1960 and "in 1961 and 1966 they were temporarily banned by the French Government." Link

posted by Mark Frauenfelder at 06:57:48 PM permalink | blogs' comments

Negativland performing in LA tonight

Mark from Negativland says:
This is very late notice, but we (Negativland) want to let you and all Boingboingers know that we are playing live in LA. this Monday night, June 12th, at the Silent Movie Theater! Yes. We are. It's at 611 North Fairfax Ave. Hollywood Box office- 323-655-2520. Tickets are $22. The doors are at 9pm and the show starts at 10pm sharp!

This is a very rare appearance for us in L.A, and at a really cool and intimate venue. The show is we are performing is called "It's All In Your Head FM", and we hope you can attend! It's about monotheism, but in stereo. With blindfolds handed out at the door. Really.

Link

posted by Mark Frauenfelder at 06:24:05 PM permalink | blogs' comments

Meme Therapy interviews Rudy Rucker

Meme Therapy has a long and interesting review with one of my very favorite authors, Rudy Rucker.
MT: What aspects of writing do you enjoy the most?

200606121734 RR: I like leaving the daily world and going to another world, a world that I had a hand in designing. You’ll notice that in most of my novels, the main character in fact leaves the world where I start him out and goes to another world. Another planet, another dimension, another sheet of reality. It’s an objective correlative for what I’m doing when I leave this mundane world and go into the world of my novel.

Writing is so much work. Every part of writing a novel is hard. The planning, the sitting down and creating, the revising. I guess the most fun part is when it seems to pour out and I’m having a good day. When I’m doing that, I stop worrying for a while, I forget myself and I’m happy and proud and even exalted and amazed to see what’s coming down or going up.

More precisely, that fun part is “the narcotic moment of creative bliss.” I just heard John Malkovich deliver that phrase, playing the role of an artist/art prof in Art School Confidential. That’s very right on; the operative word is “narcotic,” it’s definitely something you get addicted to over the years. Really I go to all this trouble writing a novel day after day month after month because, in a way, I’m trying to get high. Or see God. Or make love to the Muse. Waiting for the narcotic moment of creative bliss.

Link

posted by Mark Frauenfelder at 05:35:13 PM permalink | blogs' comments

Transparent street signs

 City Of Chicago Images Images Image04 Two years ago, artist Cayetano Ferrer took photos of the scene behind several Chicago street signs and then pasted the prints on top of the signs to achieve an amazing transparency effect. (As the Wooster Collective blog points out, Amnesty International's recent ad campaign employs a similar technique. And it's also the idea behind the "Transparent Screen" trick for your computer display.)
Link to Ferrer's "City of Chicago" image gallery, Link to Amnesty International campaign (via Neatorama)

posted by David Pescovitz at 03:41:34 PM permalink | blogs' comments

Illustrations from Rabelais's Gargantua and Pantagruel

Bibliodyssey has posted two mind-blowing selections of surrealist characters from a 1565 publication called "Les Songes Drolatiques de Pantagruel, ou sont contenues plusieurs figures de l’invention de maistre François Rabelais : & derniere oeuvre d’iceluy, pour la recreation des bons esprits." While Rabelais is often credited with drawing the characters to accompany his text, they were apparently most likely drawn by François Desprez. The absurd monsters remind me of the wonderful phantasmagoric work of Jim Woodring.
 Blogger 1717 1584 1600 Rabelais-Pantegruel-By-Francois-Desprez-11-1
From Bibliodyssey:
Franciscan friar, doctor, traveller, model for the Thelemic magickal writings of Aleister Crowley, humanist, Benedictine monk, alchemist, teacher, leader of the French renaissance, heretic, greek scholar and groundbreaking satirical writer, François Rabelais (?1483/1493-1553) issued his magnum opus 'The life of Gargantua and Pantagruel' as a five book series over 20 years up to 1564.

The books chart the humorous adventures of giants Gargantua and his son, Pantagruel in a scatalogical and often bawdy manner. Rabelais wrote in the epic tradition of Homer, and beyond the burlesque, there is an underlying serious examination of society, politics, education and philosophy whilst introducing 500 new words to the french lanugage.
Link and Link

posted by David Pescovitz at 02:52:18 PM permalink | blogs' comments

Cartoonist Chris Ware on the piano

Chris Ware on piano This isn't fair. Not only is Chris Ware a supremely gifted cartoonist, he can also play ragtime piano like nobody's business. Link (via Flog!)

Reader comment: Rob DeRose says:

Chris Ware shares his love of ragtime with MacArthur Genius Grant winner Reginald Robinson. In fact Chris heard Reginald playing in the winter garden of the Chicago Central Library one day and became fast friends. Here's a story from NPR on how the two of them found an entirely new song of Scott Joplin's. (its the fourth of four mini-segments.)

I've been a fan of Reginald ever since I saw him accompany the Squirrel Nut Zippers (in-between their first & second album) here in Chicago. If you get a chance to see him, by all means go. A nice Trib article about him. His CD on Amazon.

The Chicago Library's winter garden, where he used to practice, and where he & Chris Ware met (it's the nicest part of the building.)


posted by Mark Frauenfelder at 02:22:04 PM permalink | blogs' comments

Walt Disney's 1956 time-capsule letter to the future

The Disney company has unearthed a 1956 time-capsule containing a letter from Walt Disney to the future, on stationary bearing the legend "NO AGREEMENT WILL BE BINDING ON THIS CORPORATION UNLESS IN WRITING AND SIGNED BY AN OFFICER." Walt's letter to the future speculates about the future of entertainment and is at once profoundly wrong and profoundly right.

Walt predicts that the world will be overturned by technology, all the old order remade. At the same time, he assumes that what will come in on the tails of 1956's mass media will be...more mass media! Even though Walt himself predated truly national media, he can't conceive of the age of mass media waning and being replaced by a mass medium -- a channel like the net -- crowded with a never-ending confusion of micro-media. Walt, in other words, didn't predict the long tail.

[...O]f one thing I'm sure. People will need and demand amusement, refreshment and pleasant relaxation from daily tasks as much in your day as they have in ours and in all the generations of mankind into the remote past. What the exact nature and implementation of these mass entertainments may be, doesn't make much difference, it seems to me.

Humanity, as history informs us, changes very slowly in character and basic interests. People need to play as much as they need toll. They never cease to be fascinated by they own powers and passions, their base or noble emotions, their faiths and struggles and triumphs against handicap -- all things that make them laugh and weep and comfort one another in love and sacrifice out of the depths of their being...

Mindful of the phenomenal discoveries and applications of science to all our activities and institutions, it seems no mere guess that public entertainment will have become machined and powered by atomic and solar energies long before you read this capsule.

The extension of radar and other as yet untapped sources of cosmic force may well have changed the entire technique of communication, in the theatre and television fields as well as in other areas of informational broadcast.

Millions of people in massive assemblies around the world may now be viewing the same staged or natural event, scanned by some incredibly potent scope, in the same amount of time. They may even be viewing presently obscured vistas on neighboring planets as one might look at neighbors across our Los Angeles Streets.

Omniscience will have drawn closer to finite senses and perceptions, for our entertainment as for our livelihood -- yours, I should say, who will read this in your 21st Century.

200K PDF Link (Thanks, Anonymous Source!)

posted by Cory Doctorow at 02:01:45 PM permalink | blogs' comments

Plans for Barney Fife statue toppled by Knotts' estate

A couple of Barney Fife fans who put down $8,000 to erect a statue of Barney Fife (played by Mick Jagger lookalike Don Knotts) in downtown Mount Airy, NC (the model for Mayberry) received a letter from CBS attorneys telling them to halt the project.
 Knottsstatue [CBS attorney] Mallory Levitt explained to [would-be statue erector Tom] Hellebrand and the Mount Airy News that although Paramount/CBS owned the rights to the character of Barney Fife, the group didn't have the authority to give permission for a likeness of Don Knotts.

"That right belongs to the Knotts estate," she said.

Levitt told Hellebrand she contacted the actor's estate and business associates of Andy Griffith, and none wanted to go through with the project.

The project website, donknottsstatue.com, has a note about the cancellation of the project: "The tears on our pillows bespeak the pain that is in our hearts." The project leaders will be selling a full-size replica of a Mayberry Squad car and a golf cart made to look like a squad car on eBay to recoup their expenses. Link (Thanks for the correct, url, Dru!)

posted by Mark Frauenfelder at 01:20:09 PM permalink | blogs' comments

Ol' Glory energy drink

 72 165796068 94C49E9C84 At the Institute for the Future this morning, my colleague Mike Love is chugging this delicious and patriotic energy drink. Their tag line: "Makes you feel better all over than anywhere else." Ummm....
Link to Ol' Glory site, Link to Mike's photo on Flickr

posted by David Pescovitz at 11:41:56 AM permalink | blogs' comments

theFLOWmarket sells consumer awareness

Flowmarket Gmo
theFLOWmarket is a supermarket-as-artwork that sells consumer awareness in the form of imaginary products like "commercial free-space," "exploitation free produce," "symptom removers," "factory farming antibiotics," "renewable energy," and "a feeling of safety." The nicely-packaged products are available for sale at prices from $5 to $20. theFLOWmarket is open for business at the Danish Design Centre in Copenhagen. Link to Flash site (Thanks, Lindsay Tiemeyer!)

posted by David Pescovitz at 11:34:06 AM permalink | blogs' comments

Pea-head smashes art gallery window, gets busted

Roq La Rue gallery Kirsten Anderson has a funny story about an idiot that smashed a window at her new gallery in Seattle, called BLVD. He might just be the stupidest brick-wielder on in the known universe. Read on...
BLVD broken window [A]fter I left and Damion was closing up, some drunk mouth-breathing knuckle-dragger starts banging on the door demanding to be let in. After Damion tells him that the gallery was closed, the Moron says "I'm going to smash your door in with a brick!" Damion at the time was talking to a couple of bad asses who offered to kick the guy's ass for us and looks like we should have taken them up on it, as an hour later, the guy pulls up to the gallery in his car, double parks, pulls a brick out of his car, and smashes BLVD's door a couple times. Cool huh? No one was in the gallery -- but the guy who lives upstairs heard it and called Damion. Also -- there is a bar a few storefronts away from us and I guess the folks on the patio saw it all. But it gets better. Moron turns to go back to his car and finds he's locked himself out. Har har! So he tries to smash his own car window in with the brick, which doesn't work... so he goes into the Rendevous to use the payphone to call a locksmith which is where he got nabbed by the cops. What a maroon. So anyway. Today is happy fun door repair day.
Link

posted by Mark Frauenfelder at 11:04:18 AM permalink | blogs' comments

Prescription stimulants on campus

In yesterday's Washington Post, Joel Garreau, author of Radical Evolution, writes about the popularity of drugs like Adderall and Provigil to increase focus and wakefulness during academically stressful times. From the article:
"I'm a varsity athlete in crew," says Katharine Malone, a George Washington University junior. "So we're pretty careful about what we put in our bodies. So among my personal friends, I'd say the use is only like 50 or 60 percent..."

For a senior project this semester, Christopher Salantrie conducted a random survey of 150 University of Delaware students at the university's Morris Library and Trabant Student Center.

"With rising competition for admissions and classes becoming harder and harder by the day, a hypothesis was made that at least half of students at the university have at one point used/experienced such 'smart drugs,' " Salantrie writes in his report. He found his hunch easy to confirm.

"What was a surprise, though, was the alarming rate of senior business majors who have used" the drugs, he writes. Almost 90 percent reported at least occasional use of "smart pills" at crunch times such as final exams, including Adderall, Ritalin, Strattera and others. Of those, three-quarters did not have a legitimate prescription, obtaining the pills from friends."
Link (Thanks, Jason Tester!)

posted by David Pescovitz at 09:51:50 AM permalink | blogs' comments

Vintage pulp covers for classic novels

Slate commissioned designers to produce six vintage pulp-fiction covers for classic novels like Moby Dick ("Primitive Pirate Passions Were a Prelude to Death!"), The Iliad ("Gore! Greeks! Glory!") and Alice in Wonderland ("One girl's drug-induced descent into dreamland debauchery"). The results are lovely. Link (Thanks, Fipi Lele!)

posted by Cory Doctorow at 08:33:24 AM permalink | blogs' comments

LotR video clip with voices replaced by foolish groans

In this youtube, "CJ" has taken a clip from the Lord of the Rings trilogy in which Frodo awakens in the Elf stronghold and greets his comrades and replaced all the voices with idiotic groaning and moaning and squealing, apparently voiced by someone named Olaf. The net effect is surprisingly funny! Link (Thanks, Alice!)

posted by Cory Doctorow at 04:36:17 AM permalink | blogs' comments

William Gibson blogging fiction excerpts

Since June 1, William GIbson has been posting irregular chunks of prose to his blog, stuff that appears to be excerpts from a novel-in-progress. It's fascinating stuff, little vignettes that hint at a really exciting bigger picture.
Vianca sat cross-legged on Tito’s floor, wearing a disposable hairnet and white knit cotton gloves, with his Sony plasma screen across her knees, going over it with an Armor All wipe. When she’d wiped it completely down, it would go back into its factory packaging, which in turn would be wiped down. Tito, in his own hairnet and gloves, sat opposite her, wiping the keys of his Casio. A carton of cleaning supplies had been waiting for them in the hall, beside a new and expensive-looking vacuum-cleaner Vianca said was German. Nothing came out of this vacuum but air, she said, so there would no stray hairs or other traces left behind. Tito had helped his cousin Eusebio with exactly this procedure, though Eusebio had mainly had books, each of which had needed, according to protocol, to be flipped through for forgotten insertions and then wiped. The reasons for Eusebio’s departure had never been made clear to him. That too was protocol.
Link 1, Link 2, Link 3

posted by Cory Doctorow at 12:29:17 AM permalink | blogs' comments

Sunday, June 11, 2006

Japanese anti-foreigner comic warns against human rights act


Coal sez, "I've just translated and posted a rather well rendered manga from an 'emergency publication' in Japan about the dangers of protecting human rights. Japan is a little behind in legal recognition of basic human rights (including but not limited to racial discrimination etc.), and it seems the emergence of a bill to make protection of rights enforceable has a few people worried. The level of alarmism I think is particularly amusing, if that's the right word. What's also noteworthy is the constant demonising of trouble-making foreigners, and the pity the writer tries to inspire for the poor landlord who can no longer refuse to rent his house to Chinese etc. You can't make this stuff up!" Link (Thanks, Coal!)

posted by Cory Doctorow at 10:54:13 PM permalink | blogs' comments

iPod Nano boombox built into flashlight casing

Here's a monaural boombox built into the housing for a big Eveready flashlight. The speaker fits over the mouth, and it sits over a miniature amp scavegened from a set of desktop speakers and an iPod Nano with a wireless remote. Link to parts-list, Link to finished item

posted by Cory Doctorow at 10:12:36 PM permalink | blogs' comments

New Barenaked Ladies single as free, remixable multitracks

The Canadian band Barenaked Ladies have pre-released a track from their upcoming album Barenaked Ladies Are Me, in a four-track mix that's ready for remixing, and free. They're planning to do more of the same with their future releases.

I used to see BNL play at my local shopping mall, the Scarborough Town Centre, when all they'd released was an indie cassette tape with an amazing cover of Public Enemy's "Fight the Power" on it, and I'm so amazingly glad to see them still making great music. What's more, the band's frontman, Steve Page, is also fronting a group of Canadian musicians who've spoken out against DRM and suing fans and other music label shenanigans.

Best of all -- they're releasing the next album as a 15-song digital version as well as a 13-song CD, so I can get their music without having to take another piece of slow-decaying, space-hogging media into my already overcrowded home. Link (Thanks, Frank!)

posted by Cory Doctorow at 11:27:33 AM permalink | blogs' comments

Images from anti-DRM protest at the San Fran Apple Store

Here are some photos and a video from yesterday's anti-DRM protest at the Apple Store in San Francisco. Video Link, Photos Link

posted by Cory Doctorow at 11:19:28 AM permalink | blogs' comments

Saturday, June 10, 2006

Responses to Jaron Lanier's crit of online collectivism

Two weeks ago, Edge.org published Jaron Lanier's essay "Digital Maoism: The Hazards of the New Online Collectivism," critiquing the importance people are now placing on Wikipedia and other examples of the "hive mind," as people called it in the cyberdelic early 1990s. It's an engaging essay to be sure, but much more thought-provoking to me are the responses from the likes of Clay Shirky, Dan Gillmor, Howard Rheingold, our own Cory Doctorow, Douglas Rushkoff, and, of course, Jimmy Wales.

From Douglas Rushkoff:
I have a hard time fearing that the participants of Wikipedia or even the call-in voters of American Idol will be in a position to remake the social order anytime, soon. And I'm concerned that any argument against collaborative activity look fairly at the real reasons why some efforts turn out the way they do. Our fledgling collective intelligences are not emerging in a vacuum, but on media platforms with very specific biases.

First off, we can't go on pretending that even our favorite disintermediation efforts are revolutions in any real sense of the word. Projects like Wikipedia do not overthrow any elite at all, but merely replace one elite — in this case an academic one — with another: the interactive media elite...

While it may be true that a large number of current websites and group projects contain more content aggregation (links) than original works (stuff), that may as well be a critique of the entirety of Western culture since post-modernism. I'm as tired as anyone of art and thought that exists entirely in the realm of context and reference — but you can't blame Wikipedia for architecture based on winks to earlier eras or a music culture obsessed with sampling old recordings instead of playing new compositions.

Honestly, the loudest outcry over our Internet culture's inclination towards re-framing and the "meta" tend to come from those with the most to lose in a society where "credit" is no longer a paramount concern. Most of us who work in or around science and technology understand that our greatest achievements are not personal accomplishments but lucky articulations of collective realizations. Something in the air... Claiming authorship is really just a matter of ego and royalties.
From Cory Doctorow:
Wikipedia isn't great because it's like the Britannica. The Britannica is great at being authoritative, edited, expensive, and monolithic. Wikipedia is great at being free, brawling, universal, and instantaneous.
From Jimmy Wales (italics indicate quotes from Jaron's original essay):
"A core belief of the wiki world is that whatever problems exist in the wiki will be incrementally corrected as the process unfolds."

My response is quite simple: this alleged "core belief" is not one which is held by me, nor as far as I know, by any important or prominent Wikipedians. Nor do we have any particular faith in collectives or collectivism as a mode of writing. Authoring at Wikipedia, as everywhere, is done by individuals exercising the judgment of their own minds.

"The best guiding principle is to always cherish individuals first."

Indeed.
Link

UPDATE: Jaron Lanier writes us that he's received a lot of negative feedback from people who he thinks may not have actually read his original essay:
In the essay i criticized the desire (that has only recently become influential) to create an "oracle effect" out of anonymity on the internet - that's the thing i identified as being a new type of collectivism, but i did not make that accusation against the wikipedia - or against social cooperation on the net, which is something i was an early true believer in- if i remember those weird days well, i think i even made up some of the rhetoric and terminology that is still associated with net advocacy today- anyway, i specifically exempted many internet gatherings from my criticism, including the wikipedia, boingboing, google, cool tools... and also the substance of the essay was not accusatory but constructive- the three rules i proposed for creating effective feedback links to the "hive mind" being one example.

posted by David Pescovitz at 10:07:03 PM permalink | blogs' comments

Spanish castle optical effect

This has been going around for a couple of days, but I just found out about it. It's a neat optical effect -- you stare at a color negative of a photo for 30 seconds (or even just 15), then move the mouse over the photo, keeping your eyes on the black dot. The photo appears in color, until you move your eyes. Link

posted by Mark Frauenfelder at 08:38:10 AM permalink | blogs' comments

Friday, June 9, 2006

EFF podcast: How we kept caching legal

Danny sez, "Line Noise is the new EFF podcast (RSS or iTunes); this week's episode is a chat with EFF's IP attorney Fred von Lohmann on the background to the Section 115 Reform Act (previously on Boing Boing. He explains how a good bill was used to sneak in bad precedents - including the insane idea that all temporarily cached copies on the Net and in RAM should be copyrightable and subject to licensing.

"Good news on that, by the way -- thanks to your calls and comments, the committee have slowed the pace of this fast-track bill, and are now working to fix the bill's language. Everyone from the Copyright Office to Radio Shack and BellSouth have now commented on the problems, so there's an excellent chance of a clear resolution." Link

posted by Cory Doctorow at 11:55:41 PM permalink | blogs' comments

Why do these people have characters on their foreheads?

Nick of Square America invites you to solve a mystery.
 Blogger 5842 2554 1600 F2 I got this lot of slides about three years ago and I've never been able to figure out just what is going on. There are about 50 slides in all- all dating from between 1959 and 1969 and all of young women. Some, like the ones here have letters written on their foreheads, others have press type with their names on it affixed to either their temples or foreheads. Were the slides taken by a dermatologist or plastic surgeon or were these young women part of some now forgotten experiment.
Link

posted by Mark Frauenfelder at 03:28:19 PM permalink | blogs' comments

Trailer for 2007 Disney Pixar movie: Ratatouille

Picture 2-9 Apple has the trailer for the next Disney Pixar movie coming out in 2007. It's called Ratatouille and it appears to be about a Parisian rat (without a phony French accent) who, unlike other rats in his family, insists on eating only the finest food served in Paris' best restaurants.

The quality of the video is really nice. Don't you wish YouTube looked half as nice as this? Link

posted by Mark Frauenfelder at 02:48:18 PM permalink | blogs' comments

More great old illustrations from BilbiOdyssey

 Blogger 1717 1584 1600 Begnino-Bossi-1771-Petitot  Blogger 1717 1584 1600 Heemskerck-.-Lyons
Why are so many drawings from earlier centuries so deliciously weird? Here are a couple I came across on one of my favorite blogs, BibliOdyssey. Link

posted by Mark Frauenfelder at 02:28:39 PM permalink | blogs' comments

Play the World Cup with a stream of urine

200606091411Funny photo of a urinal with a small ball and goal in it. Link

posted by Mark Frauenfelder at 02:12:35 PM permalink | blogs' comments

Wired News tells how to watch FIFA World Cup for free online

Worldcup-1 A while back, law firm Baker & McKenzie sent Boing Boing a snippy letter warning us not to do something we wouldn't do even if they begged us -- broadcast live streams of the FIFA World Cup.

I wonder if Baker & McKenzie will send Wired News a letter complaining that Wired News is facilitating piracy for explaining a variety of ways in which FIFA World Cup fans can enjoy live video streams of the tournament on their computers without paying the rightsholder, Infront Sports & Media? Link

(Image courtesy groovehouse of The Grooveblog.)

posted by Mark Frauenfelder at 02:01:40 PM permalink | blogs' comments

Archives
hpricot-0.8.6/test/files/bnqt.html0000644000175000017500000020677111710073440016500 0ustar boutilboutil colinbane.bnqt.com - - Skate life - 1

BNQT Logo

  •  COLINBANE.BNQT.COM: Skate life

FeaturesArchives

November 20, 2009 » Blogs

Skate and Annoy is back at it, pass the word
by: Colin Bane

Spread the word: One of my favorite sites, Skate And Annoy, is annoying again.

Here's a quick note from the editor, Kilwag:

After a really rocky month and many false starts, Skate and Annoy has been back up for over a week now. It seems like we ve lost about 40 -50% of our traffic, but the trend is creeping upward. Do me a favor and spread the word that we re here, (we re queer- or mongo? goofy?) get used to it.

November 11, 2009 » Blogs

Tony Hawk, 'coolest guy ever to wear a dorky helmet,' on NPR 'Not My Job'
by: Colin Bane

Tony Hawk was on NPR's "Not My Job" show this week, answering some ridiculous "Hey! You could put an eye out!" questions about safety measures not at all related to skateboarding. He also takes on the stock "I've been dying to ask this question: How many broken bones have you had in your lifetime?" question from one of the show's hosts, talks about his legacy of Madonnas and Sean Penns, and mentions the trumpedup pundit outrage after he skateboarded in the White House.

"What did they expect you to do, walk?"

Tony Hawk on NPR? Dude's everywhere. As long as it ultimately helps get more killer concrete skateparks put up all over the damned place, I'm cool with it.

Have a listen:

November 10, 2009 » Blogs

Skateboard Evolution & Art in California opens 11/14, California Heritage Museum
by: Colin Bane

Skateboard art is getting big props for its role in California culture this weekend: The California Heritage Museum in Santa Monica opens its new Skateboard Evolution & Art exhibit on Saturday.

Here's the full press release via California Heritage Museum:

The California Heritage Museum is proud to present SKATEBOARD: Evolution and Art in California. The show opens to the general public on Saturday, November 14, 2009 and continues through Sunday, May 30, 2010.

Guest curated by legendary Z-boy Nathan Pratt in conjunction with museum staff curator Michael Trotter and advisory committee members Jeff Ho, Skip Engblom (Zephyr co-founders), Cris Dawson (1966 Hobie Champ), Z-Boys Tony Alva and Stacy Peralta, and 1980 s world champion Christian Hosoi, the exhibition traces the evolution of boards from pre-1950 to the present showcasing the riders, designers, artists, and manufacturers that created the California phenomenon known as the skateboard. From the first planks of wood with metal roller skate wheels nailed to the bottom to the modern polyurethane wheels and kicktails, the boards have rocketed the skaters to greater heights of performance and style while taking sidewalk surfing from a homegrown activity to a worldwide cultural phenomenon.

More than 275 rare boards from the world s finest collections including Jason Cohn, Dale Smith/Skate Designs Inc., Todd Huber/Skatelab Skatepark, Ray Flores, James Lang/South Bay Skates.

Additional lenders included are Kevin Anderson/Model Worm, Art Brewer, Terry Campion, Cris Dawson, Deluxe Distribution, Earl, Skip Engblom, Craig Fineman, Wayne and Donna Gunter/Surfing Cowboys, Mike Horelick/Tunnel Skateboards, Chuck Katx, Mike Kolar, Gordon McClelland, Jim McDowell/RIPWynn City Skates, Marc McKee, Aaron Murray, James O Mahoney/Santa Barbara Surfing Museum, Nathan Pratt, Mark Richards/Val Surf, Ronnie/Animal House, Steve Salyer/Pastures of Heaven, Gabriel Steptoe, Joel Tudor, Wentzle Ruml IV, Cary B. Weiss, Z-BOY Archive, and Z-CULT Skates. Photographs and art by C.R. Stecyk III, Glen E. Friedman, Craig Fineman, Wynn Miller, Kevin Ancell, Wes Humpston and more document the movement and its personalities.

This is the first exhibition of the California skate movement to be shown in Dog Town , the Santa Monica/Ocean Park area where modern skateboarding was born and the skate became an art form. The California Heritage Museum is located in the vortex at Main Street and Ocean Park Boulevard.

A full series of events, including film screenings of Dogtown and Z-Boys, Skater Dater and Rising Son: The Christian Hosoi Story, will take place at the museum and the downtown Santa Monica Library. Talks, photo exhibits, special guest appearances and autograph sessions will take place throughout the exhibit run into 2010.

Opening November 14, 2009

November 03, 2009 » Blogs

Etnies pimps out a Casa Surf suite at La Casa del Camino
by: Colin Bane

My FUEL.TV colleague Carlton Curtis is up today with a sweet slideshow from the new Casa Surf project at La Casa del Camino in Laguna Beach, CA, including this shot from the Etnies Suite.

Via FUEL.TV:

Talk about "extreme" makeovers. Orange County's Riviera Magazine invited ten SoCal surf-and-skate brands to imagine what their dream room would look like the results of which can now be found at the Riviera Design Series' "Casa Surf Project". It's a novel concept, really: Brands like Quiksilver, Billabong, and Etnies have already conquered the apparel game, so why not spruce up the place where we end up after a day of ripping? Home sweet home.

October 28, 2009 » Blogs

Chaz Ortiz: 'Officially Down' with ZY
by: Colin Bane

Looks like Zoo York is giving up the game of pretending Chaz Ortiz isn't a pro, after winning the 2008 Dew Tour and little events like the... uh... PlayStation Pro, and then holding his own for another year of pro competition all over the place in 2009.

Peep this new ZY ad: Chaz Ortiz is Officially Down.

Recent Comments

Chaz Ortiz: \'Officially Down\' with ZY

By: hammers young

On:

A PHP Error was encountered

Severity: Notice

Message: Undefined index: HTTP_USER_AGENT

Filename: template/container.php(14) : eval()'d code

Line Number: 27

Chaz Ortiz: 'Officially Down' with

Axel Cruysberghs Wins 2009 European Skateboard Championships

By: cici

On:

A PHP Error was encountered

Severity: Notice

Message: Undefined index: HTTP_USER_AGENT

Filename: template/container.php(14) : eval()'d code

Line Number: 27

Axel Cruysberghs Wins 2009 Europea

Mukee Design: Recycled Skateboard Jewelry

By: Chris

On:

A PHP Error was encountered

Severity: Notice

Message: Undefined index: HTTP_USER_AGENT

Filename: template/container.php(14) : eval()'d code

Line Number: 27

Mukee Design: Recycled Skateboard

Dew Tour Debuts Women\'s Comp at ISF Skate Open; Ladies\' Lineup Announced

By: cheap-prada.com

On:

A PHP Error was encountered

Severity: Notice

Message: Undefined index: HTTP_USER_AGENT

Filename: template/container.php(14) : eval()'d code

Line Number: 27

Dew Tour Debuts Women's Comp at IS

Sneak Peek: National Museum of American Indian \'Ramp It Up: Skateboard Culture in Native America\'

By: nativeskatepark

On:

A PHP Error was encountered

Severity: Notice

Message: Undefined index: HTTP_USER_AGENT

Filename: template/container.php(14) : eval()'d code

Line Number: 27

Sneak Peek: National Museum of Ame

Sneak Peek: National Museum of American Indian \'Ramp It Up: Skateboard Culture in Native America\'

By: nativeskatepark

On:

A PHP Error was encountered

Severity: Notice

Message: Undefined index: HTTP_USER_AGENT

Filename: template/container.php(14) : eval()'d code

Line Number: 27

Sneak Peek: National Museum of Ame

hpricot-0.8.6/test/files/basic.xhtml0000644000175000017500000000130411710073440016766 0ustar boutilboutil Sample XHTML

Sample XHTML for MouseHole 2.

Please filter me!

The third paragraph

THE FINAL PARAGRAPH

hpricot-0.8.6/Rakefile0000644000175000017500000001725311710073440014225 0ustar boutilboutilrequire 'bundler/setup' ENV.delete('RUBYOPT') # Don't propagate RUBYOPT/Bundler to subprocesses require 'rake/clean' require 'rubygems/package_task' require 'rdoc/task' require 'rake/testtask' begin require 'rake/extensiontask' rescue LoadError abort "To build, please first gem install rake-compiler" end RbConfig = Config unless defined?(RbConfig) NAME = "hpricot" REV = (`#{ENV['GIT'] || "git"} rev-list HEAD`.split.length + 1).to_s VERS = ENV['VERSION'] || "0.8" + (REV ? ".#{REV}" : "") PKG = "#{NAME}-#{VERS}" BIN = "*.{bundle,jar,so,o,obj,pdb,lib,def,exp,class,rbc}" CLEAN.include ["#{BIN}", "ext/**/#{BIN}", "lib/**/#{BIN}", "test/**/#{BIN}", 'ext/fast_xs/Makefile', 'ext/hpricot_scan/Makefile', '**/.*.sw?', '*.gem', '.config', 'pkg', 'lib/hpricot_scan.rb', 'lib/fast_xs.rb'] RDOC_OPTS = ['--quiet', '--title', 'The Hpricot Reference', '--main', 'README.md', '--inline-source'] PKG_FILES = %w(CHANGELOG COPYING README.md Rakefile) + Dir.glob("{bin,doc,test,extras}/**/*") + (Dir.glob("lib/**/*.rb") - %w(lib/hpricot_scan.rb lib/fast_xs.rb)) + Dir.glob("ext/**/*.{h,java,c,rb,rl}") + %w[ext/hpricot_scan/hpricot_scan.c ext/hpricot_scan/hpricot_css.c ext/hpricot_scan/HpricotScanService.java] # needed because they are generated later RAGEL_C_CODE_GENERATION_STYLES = { "table_driven" => 'T0', "faster_table_driven" => 'T1', "flat_table_driven" => 'F0', "faster_flat_table_driven" => 'F1', "goto_driven" => 'G0', "faster_goto_driven" => 'G1', "really_fast goto_driven" => 'G2' # "n_way_split_really_fast_goto_driven" => 'P' } DEFAULT_RAGEL_C_CODE_GENERATION = "really_fast goto_driven" SPEC = Gem::Specification.new do |s| s.name = NAME s.version = VERS s.platform = Gem::Platform::RUBY s.has_rdoc = true s.rdoc_options += RDOC_OPTS s.extra_rdoc_files = ["README.md", "CHANGELOG", "COPYING"] s.summary = "a swift, liberal HTML parser with a fantastic library" s.description = s.summary s.author = "why the lucky stiff" s.email = 'why@ruby-lang.org' s.homepage = 'http://code.whytheluckystiff.net/hpricot/' s.rubyforge_project = 'hobix' s.files = PKG_FILES s.require_paths = ["lib"] s.extensions = FileList["ext/**/extconf.rb"].to_a s.bindir = "bin" end # Dup the spec before any of its calculated ivars are set (e.g., #cache_file) Win32Spec = SPEC.dup JRubySpec = SPEC.dup # FAT cross-compile # Pass RUBY_CC_VERSION=1.8.7:1.9.2 when packaging for 1.8+1.9 mswin32 binaries %w(hpricot_scan fast_xs).each do |target| Rake::ExtensionTask.new(target, SPEC) do |ext| ext.lib_dir = File.join('lib', target) if ENV['RUBY_CC_VERSION'] ext.cross_compile = true # enable cross compilation (requires cross compile toolchain) ext.cross_platform = 'i386-mswin32' # forces the Windows platform instead of the default one end # HACK around 1.9.2 cross .def file creation def_file = "tmp/i386-mswin32/#{target}/1.9.2/#{target}-i386-mingw32.def" directory File.dirname(def_file) file def_file => File.dirname(def_file) do |t| File.open(t.name, "w") do |f| f << "EXPORTS\nInit_#{target}\n" end end task File.join(File.dirname(def_file), "Makefile") => def_file # END HACK file "lib/#{target}.rb" do |t| File.open(t.name, "w") do |f| f.puts %{require "#{target}/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/#{target}"} end end end file 'ext/hpricot_scan/extconf.rb' => :ragel desc "set environment variables to build and/or test with debug options" task :debug do ENV['CFLAGS'] ||= "" ENV['CFLAGS'] += " -g -DDEBUG" end desc "Does a full compile, test run" if defined?(JRUBY_VERSION) task :default => [:compile_java, :clean_fat_rb, :test] else task :default => [:compile, :clean_fat_rb, :test] end task :clean_fat_rb do rm_f "lib/hpricot_scan.rb" rm_f "lib/fast_xs.rb" end desc "Packages up Hpricot for all platforms." task :package => [:clean] desc "Run all the tests" Rake::TestTask.new do |t| t.libs << "test" t.test_files = FileList['test/test_*.rb'] t.verbose = true end Rake::RDocTask.new do |rdoc| rdoc.rdoc_dir = 'doc/rdoc' rdoc.options += RDOC_OPTS rdoc.main = "README.md" rdoc.rdoc_files.add ['README.md', 'CHANGELOG', 'COPYING', 'lib/**/*.rb'] end Gem::PackageTask.new(SPEC) do |p| p.need_tar = true p.gem_spec = SPEC end ### Win32 Packages ### Win32Spec.platform = 'i386-mswin32' Win32Spec.files = PKG_FILES + %w(hpricot_scan fast_xs).map do |t| unless ENV['RUBY_CC_VERSION'] file "lib/#{t}/1.8/#{t}.so" do abort "ERROR while packaging: re-run for fat win32 gems:\nrake #{ARGV.join(' ')} RUBY_CC_VERSION=1.8.7:1.9.2" end end ["lib/#{t}.rb", "lib/#{t}/1.8/#{t}.so", "lib/#{t}/1.9/#{t}.so"] end.flatten Win32Spec.extensions = [] Gem::PackageTask.new(Win32Spec) do |p| p.need_tar = false p.gem_spec = Win32Spec end JRubySpec.platform = 'java' JRubySpec.files = PKG_FILES + ["lib/hpricot_scan.jar", "lib/fast_xs.jar"] JRubySpec.extensions = [] Gem::PackageTask.new(JRubySpec) do |p| p.need_tar = false p.gem_spec = JRubySpec end desc "Determines the Ragel version and displays it on the console along with the location of the Ragel binary." task :ragel_version do @ragel_v = `ragel -v`[/(version )(\S*)/,2].to_f puts "Using ragel version: #{@ragel_v}, location: #{`which ragel`}" @ragel_v end desc "Generates the C scanner code with Ragel." task :ragel => [:ragel_version] do if @ragel_v >= 6.1 @ragel_c_code_generation_style = RAGEL_C_CODE_GENERATION_STYLES[DEFAULT_RAGEL_C_CODE_GENERATION] Dir.chdir("ext/hpricot_scan") do sh %{ragel hpricot_scan.rl -#{@ragel_c_code_generation_style} -o hpricot_scan.c} sh %{ragel hpricot_css.rl -#{@ragel_c_code_generation_style} -o hpricot_css.c} end else STDERR.puts "Ragel 6.1 or greater is required." exit(1) end end # Java only supports the table-driven code # generation style at this point. desc "Generates the Java scanner code using the Ragel table-driven code generation style." task :ragel_java => [:ragel_version] do if @ragel_v >= 6.1 puts "compiling with ragel version #{@ragel_v}" Dir.chdir("ext/hpricot_scan") do sh %{ragel -J -o HpricotCss.java hpricot_css.java.rl} sh %{ragel -J -o HpricotScanService.java hpricot_scan.java.rl} end else STDERR.puts "Ragel 6.1 or greater is required." exit(1) end end ### JRuby Compile ### def java_classpath_arg # myriad of ways to discover JRuby classpath begin cpath = Java::java.lang.System.getProperty('java.class.path').split(File::PATH_SEPARATOR) cpath += Java::java.lang.System.getProperty('sun.boot.class.path').split(File::PATH_SEPARATOR) jruby_cpath = cpath.compact.join(File::PATH_SEPARATOR) rescue => e end unless jruby_cpath jruby_cpath = ENV['JRUBY_PARENT_CLASSPATH'] || ENV['JRUBY_HOME'] && FileList["#{ENV['JRUBY_HOME']}/lib/*.jar"].join(File::PATH_SEPARATOR) end unless jruby_cpath || ENV['CLASSPATH'] =~ /jruby/ abort %{WARNING: No JRuby classpath has been set up. Define JRUBY_HOME=/path/to/jruby on the command line or in the environment} end "-cp \"#{jruby_cpath}\"" end def compile_java(filenames, jarname) sh %{javac -source 1.5 -target 1.5 #{java_classpath_arg} #{filenames.join(" ")}} sh %{jar cf #{jarname} *.class} end task :hpricot_scan_java => [:ragel_java] do Dir.chdir "ext/hpricot_scan" do compile_java(["HpricotScanService.java", "HpricotCss.java"], "hpricot_scan.jar") end end task :fast_xs_java do Dir.chdir "ext/fast_xs" do compile_java(["FastXsService.java"], "fast_xs.jar") end end %w(hpricot_scan fast_xs).each do |ext| file "lib/#{ext}.jar" => "#{ext}_java" do |t| mv "ext/#{ext}/#{ext}.jar", "lib" end task :compile_java => "lib/#{ext}.jar" end hpricot-0.8.6/README.md0000644000175000017500000002224511710073440014034 0ustar boutilboutil# Hpricot, Read Any HTML Hpricot is a fast, flexible HTML parser written in C. It's designed to be very accommodating (like Tanaka Akira's HTree) and to have a very helpful library (like some JavaScript libs -- JQuery, Prototype -- give you.) The XPath and CSS parser, in fact, is based on John Resig's JQuery. Also, Hpricot can be handy for reading broken XML files, since many of the same techniques can be used. If a quote is missing, Hpricot tries to figure it out. If tags overlap, Hpricot works on sorting them out. You know, that sort of thing. *Please read this entire document* before making assumptions about how this software works. ## An Overview Let's clear up what Hpricot is. * Hpricot is *a standalone library*. It requires no other libraries. Just Ruby! * While priding itself on speed, Hpricot *works hard to sort out bad HTML* and pays a small penalty in order to get that right. So that's slightly more important to me than speed. * *If you can see it in Firefox, then Hpricot should parse it.* That's how it should be! Let me know the minute it's otherwise. * Primarily, Hpricot is used for reading HTML and tries to sort out troubled HTML by having some idea of what good HTML is. Some people still like to use Hpricot for XML reading, but *remember to use the Hpricot::XML() method* for that! ## The Hpricot Kingdom First, here are all the links you need to know: * http://wiki.github.com/hpricot/hpricot is the Hpricot wiki and http://github.com/hpricot/hpricot/issues is the bug tracker. Go there for news and recipes and patches. It's the center of activity. * http://github.com/hpricot/hpricot is the main Git repository for Hpricot. You can get the latest code there. * See COPYING for the terms of this software. (Spoiler: it's absolutely free.) If you have any trouble, don't hesitate to contact the author. As always, I'm not going to say "Use at your own risk" because I don't want this library to be risky. If you trip on something, I'll share the liability by repairing things as quickly as I can. Your responsibility is to report the inadequacies. ## Installing Hpricot You may get the latest stable version from Rubyforge. Win32 binaries, Java binaries (for JRuby), and source gems are available. $ gem install hpricot ## An Hpricot Showcase We're going to run through a big pile of examples to get you jump-started. Many of these examples are also found at http://wiki.github.com/hpricot/hpricot/hpricot-basics, in case you want to add some of your own. ### Loading Hpricot Itself You have probably got the gem, right? To load Hpricot: require 'rubygems' require 'hpricot' If you've installed the plain source distribution, go ahead and just: require 'hpricot' ### Load an HTML Page The Hpricot() method takes a string or any IO object and loads the contents into a document object. doc = Hpricot("

A simple test string.

") To load from a file, just get the stream open: doc = open("index.html") { |f| Hpricot(f) } To load from a web URL, use open-uri, which comes with Ruby: require 'open-uri' doc = open("http://qwantz.com/") { |f| Hpricot(f) } Hpricot uses an internal buffer to parse the file, so the IO will stream properly and large documents won't be loaded into memory all at once. However, the parsed document object will be present in memory, in its entirety. ### Search for Elements Use Doc.search: doc.search("//p[@class='posted']") #=> # Doc.search can take an XPath or CSS expression. In the above example, all paragraph

elements are grabbed which have a class attribute of "posted". A shortcut is to use the divisor: (doc/"p.posted") #=> # ### Finding Just One Element If you're looking for a single element, the at method will return the first element matched by the expression. In this case, you'll get back the element itself rather than the Hpricot::Elements array. doc.at("body")['onload'] The above code will find the body tag and give you back the onload attribute. This is the most common reason to use the element directly: when reading and writing HTML attributes. ### Fetching the Contents of an Element Just as with browser scripting, the inner_html property can be used to get the inner contents of an element. (doc/"#elementID").inner_html #=> "..contents.." If your expression matches more than one element, you'll get back the contents of ''all the matched elements''. So you may want to use first to be sure you get back only one. (doc/"#elementID").first.inner_html #=> "..contents.." ### Fetching the HTML for an Element If you want the HTML for the whole element (not just the contents), use to_html: (doc/"#elementID").to_html #=> "

...
" ### Looping All searches return a set of Hpricot::Elements. Go ahead and loop through them like you would an array. (doc/"p/a/img").each do |img| puts img.attributes['class'] end ### Continuing Searches Searches can be continued from a collection of elements, in order to search deeper. # find all paragraphs. elements = doc.search("/html/body//p") # continue the search by finding any images within those paragraphs. (elements/"img") #=> # Searches can also be continued by searching within container elements. # find all images within paragraphs. doc.search("/html/body//p").each do |para| puts "== Found a paragraph ==" pp para imgs = para.search("img") if imgs.any? puts "== Found #{imgs.length} images inside ==" end end Of course, the most succinct ways to do the above are using CSS or XPath. # the xpath version (doc/"/html/body//p//img") # the css version (doc/"html > body > p img") # ..or symbols work, too! (doc/:html/:body/:p/:img) ### Looping Edits You may certainly edit objects from within your search loops. Then, when you spit out the HTML, the altered elements will show. (doc/"span.entryPermalink").each do |span| span.attributes['class'] = 'newLinks' end puts doc This changes all span.entryPermalink elements to span.newLinks. Keep in mind that there are often more convenient ways of doing this. Such as the set method: (doc/"span.entryPermalink").set(:class => 'newLinks') ### Figuring Out Paths Every element can tell you its unique path (either XPath or CSS) to get to the element from the root tag. The css_path method: doc.at("div > div:nth(1)").css_path #=> "div > div:nth(1)" doc.at("#header").css_path #=> "#header" Or, the xpath method: doc.at("div > div:nth(1)").xpath #=> "/div/div:eq(1)" doc.at("#header").xpath #=> "//div[@id='header']" ## Hpricot Fixups When loading HTML documents, you have a few settings that can make Hpricot more or less intense about how it gets involved. ## :fixup_tags Really, there are so many ways to clean up HTML and your intentions may be to keep the HTML as-is. So Hpricot's default behavior is to keep things flexible. Making sure to open and close all the tags, but ignore any validation problems. As of Hpricot 0.4, there's a new :fixup_tags option which will attempt to shift the document's tags to meet XHTML 1.0 Strict. doc = open("index.html") { |f| Hpricot f, :fixup_tags => true } This doesn't quite meet the XHTML 1.0 Strict standard, it just tries to follow the rules a bit better. Like: say Hpricot finds a paragraph in a link, it's going to move the paragraph below the link. Or up and out of other elements where paragraphs don't belong. If an unknown element is found, it is ignored. Again, :fixup_tags. ## :xhtml_strict So, let's go beyond just trying to fix the hierarchy. The :xhtml_strict option really tries to force the document to be an XHTML 1.0 Strict document. Even at the cost of removing elements that get in the way. doc = open("index.html") { |f| Hpricot f, :xhtml_strict => true } What measures does :xhtml_strict take? 1. Shift elements into their proper containers just like :fixup_tags. 2. Remove unknown elements. 3. Remove unknown attributes. 4. Remove illegal content. 5. Alter the doctype to XHTML 1.0 Strict. ## Hpricot.XML() The last option is the :xml option, which makes some slight variations on the standard mode. The main difference is that :xml mode won't try to output tags which are friendlier for browsers. For example, if an opening and closing br tag is found, XML mode won't try to turn that into an empty element. XML mode also doesn't downcase the tags and attributes for you. So pay attention to case, friends. The primary way to use Hpricot's XML mode is to call the Hpricot.XML method: doc = open("http://redhanded.hobix.com/index.xml") do |f| Hpricot.XML(f) end *Also, :fixup_tags is canceled out by the :xml option.* This is because :fixup_tags makes assumptions based how HTML is structured. Specifically, how tags are defined in the XHTML 1.0 DTD. hpricot-0.8.6/COPYING0000644000175000017500000000202211710073440013577 0ustar boutilboutilCopyright (c) 2006 why the lucky stiff Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. hpricot-0.8.6/CHANGELOG0000644000175000017500000001315311710073440013765 0ustar boutilboutil= 0.8.6 === 17 January 2012 * Allow any tags to contain unknown tags (Steven Parkes) = 0.8.5 === 29 November 2011 * Remove escaped quote (\') from matching (#55) * Fix 'undefined method downcase for nil:NilClass' on JRuby (#58) * Unescape hex numeric character references = 0.8.4 === 28 February, 2011 * GH #21, #32, #33, #36: Fix for reported segfaults = 0.8.3 === 3 November, 2010 * GH#8: Nil-check before downcasing attribute key * GH#25: Proper ruby 1.9 encoding support * GH#28. Use integers instead of ?? on 1.9, which is just a string. * including noscript to ElementInclusions , so that hpricot wont fail when trying to parse a meta tag inside head section when noscript is present. * latest changes from fast_xs mainline * Fixes to get Hpricot running on Rubinius: * Use free, not XFREE * Remove RSTRUCT craziness, don't break Array#at = 0.8.2 === 5 November, 2009 * Bring JRuby support up to speed, including Java-based hpricot_css support * Change JRuby fast_xs to have same escaping behavior as C fast_xs * fix for issue #2, downcasing of html attributes inside the parser. * solve issue #3 with bogus etags being preserved in `to_s` rather than just `to_original_html`. * fix error when attempting to reparent cleared node. (issue #5) * Hpricot::Attributes proxy object for using `ele.attributes[k] = v` directly. however, it is preferred to use the jquery-like `elements.attr(k, v)`. = 0.8.1 === 3 April, 2009 * big problems on Ruby 1.8.6, use INT2FIX instead of INT2NUM. hashes were being cast to bignums. * patch for 1.8.5 to define RARRAY_PTR. thanks, mike perham! * inspecting empty document bug, courtesy of @TalLevAmi. = 0.8 === 31st March, 2009 * Saving memory and speed by using RStruct-based elements in the C extension. * Bug in tag parsing, causing runaway