ox-2.11.0/0000755000004100000410000000000013502763477012267 5ustar www-datawww-dataox-2.11.0/README.md0000644000004100000410000002035713502763477013555 0ustar www-datawww-data# Ox gem A fast XML parser and Object marshaller as a Ruby gem. [![Build Status](https://secure.travis-ci.org/ohler55/ox.svg?branch=master)](http://travis-ci.org/ohler55/ox) [![TideLift](https://tidelift.com/badges/github/ohler55/ox)](https://tidelift.com/subscription/pkg/rubygems-ox?utm_source=rubygems-ox&utm_medium=referral&utm_campaign=readme) ## Installation gem install ox ## Documentation *Documentation*: http://www.ohler.com/ox ## Source *GitHub* *repo*: https://github.com/ohler55/ox *RubyGems* *repo*: https://rubygems.org/gems/ox ## Follow @oxgem on Twitter [Follow @peterohler on Twitter](http://twitter.com/#!/peterohler) for announcements and news about the Ox gem. ## Support [Get supported Ox with a Tidelift Subscription.](https://tidelift.com/subscription/pkg/rubygems-ox?utm_source=rubygems-ox&utm_medium=referral&utm_campaign=readme) ## Links of Interest [Ruby XML Gem Comparison](http://www.ohler.com/dev/xml_with_ruby/xml_with_ruby.html) for a performance comparison between Ox, Nokogiri, and LibXML. [Fast Ruby XML Serialization](http://www.ohler.com/dev/ruby_object_xml_serialization/ruby_object_xml_serialization.html) to see how Ox can be used as a faster replacement for Marshal. *Fast JSON parser and marshaller on RubyGems*: https://rubygems.org/gems/oj *Fast JSON parser and marshaller on GitHub*: https://github.com/ohler55/oj ## Release Notes See [CHANGELOG.md](CHANGELOG.md) ## Description Optimized XML (Ox), as the name implies was written to provide speed optimized XML and now HTML handling. It was designed to be an alternative to Nokogiri and other Ruby XML parsers in generic XML parsing and as an alternative to Marshal for Object serialization. Unlike some other Ruby XML parsers, Ox is self contained. Ox uses nothing other than standard C libraries so version issues with libXml are not an issue. Marshal uses a binary format for serializing Objects. That binary format changes with releases making Marshal dumped Object incompatible between some versions. The use of a binary format make debugging message streams or file contents next to impossible unless the same version of Ruby and only Ruby is used for inspecting the serialize Object. Ox on the other hand uses human readable XML. Ox also includes options that allow strict, tolerant, or a mode that automatically defines missing classes. It is possible to write an XML serialization gem with Nokogiri or other XML parsers but writing such a package in Ruby results in a module significantly slower than Marshal. This is what triggered the start of Ox development. Ox handles XML documents in three ways. It is a generic XML parser and writer, a fast Object / XML marshaller, and a stream SAX parser. Ox was written for speed as a replacement for Nokogiri, Ruby LibXML, and for Marshal. As an XML parser it is 2 or more times faster than Nokogiri and as a generic XML writer it is as much as 20 times faster than Nokogiri. Of course different files may result in slightly different times. As an Object serializer Ox is up to 6 times faster than the standard Ruby Marshal.dump() and up to 3 times faster than Marshal.load(). The SAX like stream parser is 40 times faster than Nokogiri and more than 13 times faster than LibXML when validating a file with minimal Ruby callbacks. Unlike Nokogiri and LibXML, Ox can be tuned to use only the SAX callbacks that are of interest to the caller. (See the perf_sax.rb file for an example.) Ox is compatible with Ruby 1.8.7, 1.9.3, 2.1.2, 2.2.0 and RBX. ### Object Dump Sample: ```ruby require 'ox' class Sample attr_accessor :a, :b, :c def initialize(a, b, c) @a = a @b = b @c = c end end # Create Object obj = Sample.new(1, "bee", ['x', :y, 7.0]) # Now dump the Object to an XML String. xml = Ox.dump(obj) # Convert the object back into a Sample Object. obj2 = Ox.parse_obj(xml) ``` ### Generic XML Writing and Parsing: ```ruby require 'ox' doc = Ox::Document.new(:version => '1.0') top = Ox::Element.new('top') top[:name] = 'sample' doc << top mid = Ox::Element.new('middle') mid[:name] = 'second' top << mid bot = Ox::Element.new('bottom') bot[:name] = 'third' mid << bot xml = Ox.dump(doc) # xml = # # # # # doc2 = Ox.parse(xml) puts "Same? #{doc == doc2}" # true ``` ### HTML Parsing: Ox can be used to parse HTML with a few options changes. HTML is often loose in regard to conformance. For HTML parsing try these options. ```ruby Ox.default_options = { mode: :generic, effort: :tolerant, smart: true } ``` ### SAX XML Parsing: ```ruby require 'stringio' require 'ox' class Sample < ::Ox::Sax def start_element(name); puts "start: #{name}"; end def end_element(name); puts "end: #{name}"; end def attr(name, value); puts " #{name} => #{value}"; end def text(value); puts "text #{value}"; end end io = StringIO.new(%{ }) handler = Sample.new() Ox.sax_parse(handler, io) # outputs # start: top # name => sample # start: middle # name => second # start: bottom # name => third # end: bottom # end: middle # end: top ``` ### Yielding results immediately while SAX XML Parsing: ```ruby require 'stringio' require 'ox' class Yielder < ::Ox::Sax def initialize(block); @yield_to = block; end def start_element(name); @yield_to.call(name); end end io = StringIO.new(%{ }) proc = Proc.new { |name| puts name } handler = Yielder.new(proc) puts "before parse" Ox.sax_parse(handler, io) puts "after parse" # outputs # before parse # top # middle # bottom # after parse ``` ### Parsing XML into a Hash (fast) ```ruby require 'ox' xml = %{ Rock bottom } puts Ox.load(xml, mode: :hash) puts Ox.load(xml, mode: :hash_no_attrs) #{:top=>[{:name=>"sample"}, {:middle=>[{:name=>"second"}, {:bottom=>[{:name=>"third"}, "Rock bottom"]}]}]} #{:top=>{:middle=>{:bottom=>"Rock bottom"}}} ``` ### Object XML format The XML format used for Object encoding follows the structure of the Object. Each XML element is encoded so that the XML element name is a type indicator. Attributes of the element provide additional information such as the Class if relevant, the Object attribute name, and Object ID if necessary. The type indicator map is: - **a** => `Array` - **b** => `Base64` - only for legacy loads - **c** => `Class` - **f** => `Float` - **g** => `Regexp` - **h** => `Hash` - **i** => `Fixnum` - **j** => `Bignum` - **l** => `Rational` - **m** => `Symbol` - **n** => `FalseClass` - **o** => `Object` - **p** => `Ref` - **r** => `Range` - **s** => `String` - **t** => `Time` - **u** => `Struct` - **v** => `Complex` - **x** => `Raw` - **y** => `TrueClass` - **z** => `NilClass` If the type is an Object, type 'o' then an attribute named 'c' should be set with the full Class name including the Module names. If the XML element represents an Object then a sub-elements is included for each attribute of the Object. An XML element attribute 'a' is set with a value that is the name of the Ruby Object attribute. In all cases, except for the Exception attribute hack the attribute names begin with an @ character. (Exception are strange in that the attributes of the Exception Class are not named with a @ suffix. A hack since it has to be done in C and can not be done through the interpreter.) Values are encoded as the text portion of an element or in the sub-elements of the principle. For example, a Fixnum is encoded as: ```xml 123 ``` An Array has sub-elements and is encoded similar to this example. ```xml 1 abc ``` A Hash is encoded with an even number of elements where the first element is the key and the second is the value. This is repeated for each entry in the Hash. An example is of { 1 => 'one', 2 => 'two' } encoding is: ```xml 1 one 2 two ``` Ox supports circular references where attributes of one Object can refer to an Object that refers back to the first Object. When this option is used an Object ID is added to each XML Object element as the value of the 'a' attribute. ox-2.11.0/CHANGELOG.md0000644000004100000410000003535613502763477014114 0ustar www-datawww-data# Changelog All changes to the Ox gem are documented here. Releases follow semantic versioning. ## [Unreleased] ## [2.11.0] - 2019-06-14 ### Changed - Ox::SyntaxError replaces SyntaxError where such an exception would have previously been raised. ### Fixed - File offsets when using the SAX parser now use `off_t`. Setting `-D_FILE_OFFSET_BITS=64` in the Makefile may allow 32 bit systems to access files larger than 2^32 in size. This has not been tested. ## [2.10.1] - 2019-05-27 ### Fixed - Remove extra space from doctype dump. ## [2.10.0] - 2018-08-26 ### Fixed - `:element_key_mod` and `:attr_key_mod` options were added to allow keys to be modified when loading. ## [2.9.4] - 2018-07-16 ### Fixed - Fixed issue with malformed object mode input. ## [2.9.3] - 2018-06-12 ### Fixed - Handle `\0` in dumped strings better. - No `\n` added on dumped if indent is less than zero. ## [2.9.2] - 2018-04-16 ### Fixed - `locate` fixed to cover a missing condition with named child thanks to mberlanda. ### Added - `locate` supports attribute exists searches thanks to mberlanda. ## [2.9.1] - 2018-04-14 ### Fixed - `prepend_child` added by mberlanda. ## [2.9.0] - 2018-03-13 ### Added - New builder methods for building HTML. - Examples added. ## [2.8.4] - 2018-03-4 ### Fixed - Commented out debug statement. ## [2.8.3] - 2018-03-3 ### Fixed - Attribute values now escape < and > on dump. ## [2.8.2] - 2017-11-1 ### Fixed - Fixed bug with SAX parser that caused a crash with very long invalid instruction element. - Fixed SAX parse error with double elements. ## [2.8.1] - 2017-10-27 ### Fixed - Avoid crash with invalid XML passed to Ox.parse_obj(). ## [2.8.0] - 2017-09-22 ### Fixed - Added :skip_off mode to make sax callback on every none empty string even if there are not other non-whitespace characters present. ## [2.7.0] - 2017-08-18 ### Added - Two new load modes added, :hash and :hash_no_attrs. Both load an XML document to create a Hash populated with core Ruby objects. ### Fixed - Worked around Ruby API change for RSTRUCT_LEN so Ruby 2.4.2 does not crash. ## [2.6.0] - 2017-08-9 ### Added - The Element#each() method was added to allow iteration over Element nodes conditionally. - Element#locate() now supports a [@attr=value] specification. - An underscore character used in the easy API is now treated as a wild card for valid XML characters that are not valid for Ruby method names. ## [2.5.0] - 2017-05-4 ### Added - Added a :nest_ok option to SAX hints that will ignore the nested check on a tag to accomadate non-compliant HTML. ### Changed - Set the default for skip to be to skip white space. ## [2.4.13] - 2017-04-21 ### Fixed - Corrected Builder special character handling. ## [2.4.12] - 2017-04-11 ### Fixed - Fixed position in builder when encoding special characters. ## [2.4.11] - 2017-03-19 ### Fixed - Fixed SAX parser bug regarding upper case hints not matching. ## [2.4.10] - 2017-02-13 ### Fixed - Dump is now smarter about which characters to replace with &xxx; alternatives. ## [2.4.9] - 2017-01-25 ### Added - Added a SAX hint that allows comments to be treated like other elements. ## [2.4.8] - 2017-01-15 ### Changed - Tolerant mode now allows case-insensitve matches on elements during parsing. Smart mode in the SAX parser is also case insensitive. ## [2.4.7] - 2016-December-25 ### Fixed - After encountering a <> the SAX parser will continue parsing after reporting an error. ## [2.4.6] - 2016-11-28 ### Added - Added margin option to dump. ## [2.4.5] - 2016-09-11 ### Fixed - Thanks to GUI for fixing an infinite loop in Ox::Builder. ## [2.4.4] - 2016-08-9 ### Fixed - Builder element attributes with special characters are now encoded correctly. - A newline at end of an XML string is now controlled by the indent value. A value of-1 indicates no terminating newline character and an indentation of zero. ## [2.4.3] - 2016-06-26 ### Fixed - Fixed compiler warnings and errors. - Updated for Ruby 2.4.0. ## [2.4.2] - 2016-06-23 ### Fixed - Added methods to Ox::Builder to provide output position information. ## [2.4.1] - 2016-04-30 ### Added - Added overlay feature to give control over which elements generate callbacks with the SAX parser. - Element.locate now includes self if the path is relative and starts with a wildcard. ### Fixed - Made SAX smarter a little smarter or rather let it handle unquoted string with a / at the end. - Fixed bug with reporting errors of element names that are too long. ## [2.4.0] - 2016-04-14 ### Fixed - Added Ox::Builder that constructs an XML string or writes XML to a stream using builder methods. ## [2.3.0] - 2016-02-21 ### Added - Added Ox::Element.replace_text() method. - A invalid_replace option has been added. It will replace invalid XML character with a provided string. Strict effort now raises an exception if an invalid character is encountered on dump or load. ### Changed - Ox.load and Ox.parse now allow for a callback block to handle multiple top level entities in the input. - The Ox SAX parser now supports strings as input directly without and IO wrapper. ### Fixed - Ox::Element nodes variable is now always initialized to an empty Array. - Ox::Element attributes variable is now always initialized to an empty Hash. ## [2.2.4] - 2016-02-4 ### Fixed - Changed the code to allow compilation on older compilers. No change in functionality otherwise. ## [2.2.3] - 2015-December-31 ### Fixed - The convert_special option now applies to attributes as well as elements in the SAX parser. - The convert_special option now applies to the regualr parser as well as the SAX parser. - Updated to work correctly with Ruby 2.3.0. ## [2.2.2] - 2015-10-19 ### Fixed - Fixed problem with detecting invalid special character sequences. - Fixed bug that caused a crash when an <> was encountered with the SAX parser. ## [2.2.1] - 2015-07-30 ### Fixed - Added support to handle script elements in html. - Added support for position from start for the sax parser. ## [2.2.0] - 2015-04-20 ### Fixed - Added the SAX convert_special option to the default options. - Added the SAX smart option to the default options. - Other SAX options are now taken from the defaults if not specified. ## [2.1.8] - 2015-02-10 ### Fixed - Fixed a bug that caused all input to be read before parsing with the sax parser and an IO.pipe. ## [2.1.7] - 2015-01-31 ### Fixed - Empty elements such as are now called back with empty text. - Fixed GC problem that occurs with the new GC in Ruby 2.2 that garbage collects Symbols. ## [2.1.6] - 2014-December-31 ### Fixed - Update licenses. No other changes. ## [2.1.5] - 2014-December-30 ### Fixed - Fixed symbol intern problem with Ruby 2.2.0. Symbols are not dynamic unless rb_intern(). There does not seem to be a way to force symbols created with encoding to be pinned. ## [2.1.4] - 2014-December-5 ### Fixed - Fixed bug where the parser always started at the first position in a stringio instead of the current position. ## [2.1.3] - 2014-07-25 ### Fixed - Added check for @attributes being nil. Reported by and proposed fix by Elana. ## [2.1.2] - 2014-07-17 ### Fixed - Added skip option to parsing. This allows white space to be collapsed in two different ways. - Added respond_to? method for easy access method checking. ## [2.1.1] - 2014-02-12 ### Fixed - Worked around a module reset and clear that occurs on some Rubies. ## [2.1.0] - 2014-02-2 ### Fixed - Thanks to jfontan Ox now includes support for XMLRPC. ## [2.0.12] - 2013-05-21 ### Fixed - Fixed problem compiling with latest version of Rubinius. ## [2.0.11] - 2013-10-17 ### Fixed - Added support for BigDecimals in :object mode. ## [10.2.10] ### Fixed - Small fix to not create an empty element from a closed element when using locate(). - Fixed to keep objects from being garbages collected in Ruby 2.x. ## [2.0.9] - 2013-09-2 ### Fixed - Fixed bug that did not allow ISO-8859-1 characters and caused a crash. ## [2.0.8] - 2013-08-6 ### Fixed - Allow single quoted strings in all modes. ## [2.0.7] - 2013-08-4 ### Fixed - Fixed DOCTYPE parsing to handle nested '>' characters. ## [2.0.6] - 2013-07-23 ### Fixed - Fixed bug in special character decoding that chopped of text. - Limit depth on dump to 1000 to avoid core dump on circular references if the user does not specify circular. - Handles dumping non-string values for attributes correctly by converting the value to a string. ## [2.0.5] - 2013-07-5 ### Fixed - Better support for special character encoding with 1.8.7.- February 8, 2013 ## [2.0.4] - 2013-06-24 ### Fixed - Fixed SAX parser handling of &#nnnn; encoded characters. ## [2.0.3] - 2013-06-12 ### Fixed - Fixed excessive memory allocation issue for very large file parsing (half a gig). ## [2.0.2] - 2013-06-7 ### Fixed - Fixed buffer sliding window off by 1 error in the SAX parser. ## [1] -2-.0 ### Fixed - Added an attrs_done callback to the sax parser that will be called when all attributes for an element have been read. - Fixed bug in SAX parser where raising an exception in the handler routines would not cleanup. The test put together by griffinmyers was a huge help. - Reduced stack use in a several places to improve fiber support. - Changed exception handling to assure proper cleanup with new stack minimizing. ## [2.0.0] - 2013-04-16 ### Fixed - The SAX parser went through a significant re-write. The options have changed. It is now 15% faster on large files and much better at recovering from errors. So much so that the tolerant option was removed and is now the default and only behavior. A smart option was added however. The smart option recognizes a file as an HTML file and will apply a simple set of validation rules that allow the HTML to be parsed more reasonably. Errors will cause callbacks but the parsing continues with the best guess as to how to recover. Rubymaniac has helped with testing and prompted the rewrite to support parsing HTML pages. - HTML is now supported with the SAX parser. The parser knows some tags like \ or \ do not have to be closed. Other hints as to how to parse and when to raise errors are also included. The parser does it's best to continue parsing even after errors. - Added symbolize option to the sax parser. This option, if set to false will use strings instead of symbols for element and attribute names. - A contrib directory was added for people to submit useful bits of code that can be used with Ox. The first contributor is Notezen with a nice way of building XML. ## [1.9.4] - 2013-03-24 ### Fixed - SAX tolerant mode handle multiple elements in a document better. ## [1.9.3] - 2013-03-22 ### Fixed - mcarpenter fixed a compile problem with Cygwin. - Now more tolerant when the :effort is set to :tolerant. Ox will let all sorts of errors typical in HTML documents pass. The result may not be perfect but at least parsed results are returned. - Attribute values need not be quoted or they can be quoted with single quotes or there can be no =value are all. - Elements not terminated will be terminated by the next element termination. This effect goes up until a match is found on the element name. - SAX parser also given a :tolerant option with the same tolerance as the string parser. ## [1.9.2] - 2013-03-9 ### Fixed - Fixed bug in the sax element name check that cause a memory write error. ## [1.9.1] - 2013-02-27 ### Fixed - Fixed the line numbers to be the start of the elements in the sax parser. ## [1.9.0] - 2013-02-25 ### Fixed - Added a new feature to Ox::Element.locate() that allows filtering by node Class. - Added feature to the Sax parser. If @line is defined in the handler it is set to the line number of the xml file before making callbacks. The same goes for @column but it is updated with the column. ## [1.8.9] - 2013-02-21 ### Fixed - Fixed bug in element start and end name checking. ## [1.8.8] - 2013-02-17 ### Fixed - Fixed bug in check for open and close element names matching. ## [7] -1-.8 ### Fixed - Added a correct check for element open and close names. - Changed raised Exceptions to customer classes that inherit from StandardError. - Fixed a few minor bugs. ## [1.8.6] - 2013-02-7 ### Fixed - Removed broken check for matching start and end element names in SAX mode. The names are still included in the handler callbacks so the user can perform the check is desired. ## [1.8.5] - 2013-02-3 ### Fixed - added encoding support for JRuby where possible when in 1.9 mode. ## [1.8.4] - 2013-01-25 ### Fixed - Applied patch by mcarpenter to fix solaris issues with build and remaining undefined @nodes. ## [1.8.3] - 2013-01-24 ### Fixed - Sax parser now honors encoding specification in the xml prolog correctly. ## [1.8.2] - 2013-01-18 ### Fixed - Ox::Element.locate no longer raises and exception if there are no child nodes. - Dumping an XML document no longer puts a carriage return after processing instructions. ## [1.8.1] - 2012-December-17 ### Fixed - Fixed bug that caused a crash when an invalid xml with two elements and no was parsed. (issue #28) - Modified the SAX parser to not strip white space from the start of string content. ## [1.8.0] - 2012-December-11 ### Fixed - Added more complete support for processing instructions in both the generic parser and in the sax parser. This change includes and additional sax handler callback for the end of the instruction processing. ## [1.7.1] - 2012-December-6 ### Fixed - Pulled in sharpyfox's changes to make Ox with with Windows. (issue #24) - Fixed bug that ignored white space only text elements. (issue #26) ## [1.7.0] - 2012-11-27 ### Fixed - Added support for BOM in the SAX parser. ## [1.6.9] - 2012-11-25 ### Fixed - Added support for BOM. They are honored for and handled correctly for UTF-8. Others cause encoding issues with Ruby or raise an error as others are not ASCII compatible.. ## [1.6.8] - 2012-11-18 ### Fixed - Changed extconf.rb to use RUBY_PLATFORM. ## [1.6.7] - 2012-11-15 ### Fixed - Now uses the encoding of the imput XML as the default encoding for the parsed output if the default options encoding is not set and the encoding is not set in the XML file prolog. ## [1.6.5] - 2012-10-25 ### Fixed - Special character handling now supports UCS-2 and UCS-4 Unicode characters as well as UTF-8 characters. ## [1.6.4] - 2012-10-24 ### Fixed - Special character handling has been improved. Both hex and base 10 numeric values are allowed up to a 64 bit number for really long UTF-8 characters. ## [1.6.3] - 2012-10-22 ### Fixed - Fixed compatibility issues with Linux (Ubuntu) mostly related to pointer sizes. ## [1.6.2] - 2012-10-7 ### Fixed - Added check for Solaris and Linux builds to not use the timezone member of time struct (struct tm). ## [1.6.1] - 2012-10-7 ### Fixed - Added check for Solaris builds to not use the timezone member of time struct (struct tm). ox-2.11.0/LICENSE0000644000004100000410000000206513502763477013277 0ustar www-datawww-dataThe MIT License (MIT) Copyright (c) 2012 Peter Ohler Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.ox-2.11.0/lib/0000755000004100000410000000000013502763477013035 5ustar www-datawww-dataox-2.11.0/lib/ox/0000755000004100000410000000000013502763477013463 5ustar www-datawww-dataox-2.11.0/lib/ox/comment.rb0000644000004100000410000000047013502763477015453 0ustar www-datawww-data module Ox # Comments represent XML comments in an XML document. A comment has a value # attribute only. class Comment < Node # Creates a new Comment with the specified value. # - +value+ [String] string value for the comment def initialize(value) super end end # Comment end # Ox ox-2.11.0/lib/ox/version.rb0000644000004100000410000000010713502763477015473 0ustar www-datawww-data module Ox # Current version of the module. VERSION = '2.11.0' end ox-2.11.0/lib/ox/doctype.rb0000644000004100000410000000046313502763477015462 0ustar www-datawww-data module Ox # Represents a DOCTYPE in an XML document. class DocType < Node # Creates a DOCTYPE elements with the content as a string specified in the # value parameter. # - +value+ [String] string value for the element def initialize(value) super end end # DocType end # Ox ox-2.11.0/lib/ox/document.rb0000644000004100000410000000203213502763477015623 0ustar www-datawww-data module Ox # Represents an XML document. It has a fixed set of attributes which form # the XML prolog. A Document includes Elements. class Document < Element # Create a new Document. # - +prolog+ [Hash] prolog attributes # - _:version_ [String] version, typically '1.0' or '1.1' # - _:encoding_ [String] encoding for the document, currently included but ignored # - _:standalone_ [String] indicates the document is standalone def initialize(prolog={}) super(nil) @attributes = { } @attributes[:version] = prolog[:version] unless prolog[:version].nil? @attributes[:encoding] = prolog[:encoding] unless prolog[:encoding].nil? @attributes[:standalone] = prolog[:standalone] unless prolog[:standalone].nil? end # Returns the first Element in the document. def root() unless !instance_variable_defined?(:@nodes) || @nodes.nil? @nodes.each do |n| return n if n.is_a?(::Ox::Element) end end nil end end # Document end # Ox ox-2.11.0/lib/ox/instruct.rb0000644000004100000410000000237213502763477015667 0ustar www-datawww-data module Ox # An Instruct represents a processing instruction of an XML document. It has a target, attributes, and a value or # content. The content will be all characters with the exception of the target. If the content follows a regular # attribute format then the attributes will be set to the parsed values. If it does not follow the attribute formate # then the attributes will be empty. class Instruct < Node include HasAttrs # The content of the processing instruction. attr_accessor :content # Creates a new Instruct with the specified name. # - +name+ [String] name of the Instruct def initialize(name) super @attributes = nil @content = nil end alias target value # Returns true if this Object and other are of the same type and have the # equivalent value and the equivalent elements otherwise false is returned. # - +other+ [Object] Object compare _self_ to. # *return* [Boolean] true if both Objects are equivalent, otherwise false. def eql?(other) return false unless super(other) return false unless self.attributes == other.attributes return false unless self.content == other.content true end alias == eql? end # Instruct end # Ox ox-2.11.0/lib/ox/error.rb0000644000004100000410000000127013502763477015141 0ustar www-datawww-data module Ox # Base error class for Ox errors. class Error < StandardError end # Error # An Exception that is raised as a result of a parse error while parsing a XML document. class ParseError < Error end # ParseError # An Exception that is raised as a result of an invalid argument. class ArgError < Error end # ArgError # An Exception that is raised as a result of invalid XML syntax. class SyntaxError < Error end # An Exception raised if a path is not valid. class InvalidPath < Error # Create a new instance with the +path+ specified. def initialize(path) super("#{path.join('/')} is not a valid location.") end end # InvalidPath end # Ox ox-2.11.0/lib/ox/cdata.rb0000644000004100000410000000036413502763477015067 0ustar www-datawww-data module Ox # CData represents a CDATA element in an XML document. class CData < Node # Creates a CDATA element. # - +value+ [String] value for the CDATA contents def initialize(value) super end end # CData end # Ox ox-2.11.0/lib/ox/node.rb0000644000004100000410000000125113502763477014734 0ustar www-datawww-data module Ox # The Node is the base class for all other in the Ox module. class Node # String value associated with the Node. attr_accessor :value # Creates a new Node with the specified String value. # - +value+ [String] string value for the Node def initialize(value) @value = value.to_s end # Returns true if this Object and other are of the same type and have the # equivalent value otherwise false is returned. # - +other+ [Object] Object to compare _self_ to. def eql?(other) return false if (other.nil? or self.class != other.class) other.value == self.value end alias == eql? end # Node end # Ox ox-2.11.0/lib/ox/element.rb0000644000004100000410000004112713502763477015446 0ustar www-datawww-data module Ox # An Element represents a element of an XML document. It has a name, # attributes, and sub-nodes. # # To access the child elements or attributes there are several options. One # is to walk the nodes and attributes. Another is to use the locate() # method. The easiest for simple regularly formatted XML is to reference the # sub elements or attributes simply by name. Repeating elements with the # same name can be referenced with an element count as well. A few examples # should explain the 'easy' API more clearly. # # *Example* # # doc = Ox.parse(%{ # # # # Peter # Ohler # # # Makie # Ohler # # # }) # # doc.People.Person.given.text # => "Peter" # doc.People.Person(1).given.text # => "Makie" # doc.People.Person.age # => "58" class Element < Node include HasAttrs # Creates a new Element with the specified name. # - +name+ [String] name of the Element def initialize(name) super @attributes = {} @nodes = [] end alias name value # Returns the Element's nodes array. These are the sub-elements of this # Element. # *return* [Array] all child Nodes. def nodes @nodes = [] if !instance_variable_defined?(:@nodes) or @nodes.nil? @nodes end # Appends a Node to the Element's nodes array. Returns the element itself # so multiple appends can be chained together. # - +node+ [Node] Node to append to the nodes array def <<(node) raise "argument to << must be a String or Ox::Node." unless node.is_a?(String) or node.is_a?(Node) @nodes = [] if !instance_variable_defined?(:@nodes) or @nodes.nil? @nodes << node self end # Prepend a Node to the Element's nodes array. Returns the element itself # so multiple appends can be chained together. # - +node+ [Node] Node to prepend to the nodes array def prepend_child(node) raise "argument to << must be a String or Ox::Node." unless node.is_a?(String) or node.is_a?(Node) @nodes = [] if !instance_variable_defined?(:@nodes) or @nodes.nil? @nodes.unshift(node) self end # Returns true if this Object and other are of the same type and have the # equivalent value and the equivalent elements otherwise false is returned. # - +other+ [Object] Object compare _self_ to. # *return* [Boolean] true if both Objects are equivalent, otherwise false. def eql?(other) return false unless super(other) return false unless self.attributes == other.attributes return false unless self.nodes == other.nodes true end alias == eql? # Returns the first String in the elements nodes array or nil if there is # no String node. def text() nodes.each { |n| return n if n.is_a?(String) } nil end # Clears any child nodes of an element and replaces those with a single Text # (String) node. Note the existing nodes array is modified and not replaced. # - +txt+ [String] to become the only element of the nodes array def replace_text(txt) raise "the argument to replace_text() must be a String" unless txt.is_a?(String) @nodes.clear() @nodes << txt end # Return true if all the key-value pairs in the cond Hash match the # @attributes key-values. def attr_match(cond) cond.each_pair { |k,v| return false unless v == @attributes[k.to_sym] || v == @attributes[k.to_s] } true end # Iterate over each child of the instance yielding according to the cond # argument value. If the cond argument is nil then all child nodes are # yielded to. If cond is a string then only the child Elements with a # matching name will be yielded to. If the cond is a Hash then the # keys-value pairs in the cond must match the child attribute values with # the same keys. Any other cond type will yield to nothing. def each(cond=nil) if cond.nil? nodes.each { |n| yield(n) } else cond = cond.to_s if cond.is_a?(Symbol) if cond.is_a?(String) nodes.each { |n| yield(n) if n.is_a?(Element) && cond == n.name } elsif cond.is_a?(Hash) nodes.each { |n| yield(n) if n.is_a?(Element) && n.attr_match(cond) } end end end # Returns an array of Nodes or Strings that correspond to the locations # specified by the path parameter. The path parameter describes the path # to the return values which can be either nodes in the XML or # attributes. The path is a relative description. There are similarities # between the locate() method and XPath but locate does not follow the # same rules as XPath. The syntax is meant to be simpler and more Ruby # like. # # Like XPath the path delimiters are the slash (/) character. The path is # split on the delimiter and each element of the path then describes the # child of the current Element to traverse. # # Attributes are specified with an @ prefix. # # Each element name in the path can be followed by a bracket expression # that narrows the paths to traverse. Supported expressions are numbers # with a preceeding qualifier. Qualifiers are -, +, <, and >. The + # qualifier is the default. A - qualifier indicates the index begins at # the end of the children just like for Ruby Arrays. The < and > # qualifiers indicates all elements either less than or greater than # should be matched. Note that unlike XPath, the element index starts at 0 # similar to Ruby be contrary to XPath. # # Element names can also be wildcard characters. A * indicates any decendent should be followed. A ? indicates any # single Element can match the wildcard. A ^ character followed by the name of a Class will match any node of the # specified class. Valid class names are Element, Comment, String (or Text), CData, DocType. # # Examples are: # * element.locate("Family/Pete/*") returns all children of the Pete Element. # * element.locate("Family/?[1]") returns the first element in the Family Element. # * element.locate("Family/?[<3]") returns the first 3 elements in the Family Element. # * element.locate("Family/?[@age]") returns the elements with an age attribute defined in the Family Element. # * element.locate("Family/Kid[@age]") returns the Kid elements with an age attribute defined in the Family Element. # * element.locate("Family/?[@age=32]") returns the elements with an age attribute equal to 32 in the Family Element. # * element.locate("Family/Kid[@age=32]") returns the Kid elements with an age attribute equal to 32 in the Family Element. # * element.locate("Family/?/@age") returns the arg attribute for each child in the Family Element. # * element.locate("Family/*/@type") returns the type attribute value for decendents of the Family. # * element.locate("Family/^Comment") returns any comments that are a child of Family. # # - +path+ [String] path to the Nodes to locate def locate(path) return [self] if path.nil? found = [] pa = path.split('/') if '*' == path[0] # a bit of a hack but it allows self to be checked as well e = Element.new('') e << self e.alocate(pa, found) else alocate(pa, found) end found end # Remove all the children matching the path provided # # Examples are: # * element.remove_children(Ox:Element) removes the element passed as argument if child of the element. # * element.remove_children(Ox:Element, Ox:Element) removes the list of elements passed as argument if children of the element. # # - +children+ [Array] array of OX def remove_children(*children) return self if children.compact.empty? recursive_children_removal(children.compact.map { |c| c.object_id }) self end # Remove all the children matching the path provided # # Examples are: # * element.remove_children_by_path("*") removes all children attributes. # * element.remove_children_by_path("Family/Kid[@age=32]") removes the Kid elements with an age attribute equal to 32 in the Family Element. # # - +path+ [String] path to the Nodes to locate def remove_children_by_path(path) del_locate(path.split('/')) unless path.nil? self end # Handles the 'easy' API that allows navigating a simple XML by # referencing elements and attributes by name. # - +id+ [Symbol] element or attribute name # *return* [Element|Node|String|nil] the element, attribute value, or Node identifed by the name # # _raise_ [NoMethodError] if no match is found def method_missing(id, *args, &block) has_some = false ids = id.to_s i = args[0].to_i # will be 0 if no arg or parsing fails nodes.each do |n| if (n.is_a?(Element) || n.is_a?(Instruct)) && (n.value == id || n.value == ids || name_matchs?(n.value, ids)) return n if 0 == i has_some = true i -= 1 end end if instance_variable_defined?(:@attributes) return @attributes[id] if @attributes.has_key?(id) return @attributes[ids] if @attributes.has_key?(ids) end return nil if has_some raise NoMethodError.new("#{ids} not found", name) end # - +id+ [String|Symbol] identifer of the attribute or method # - +ignored+ inc_all [Boolean] # *return* true if the element has a member that matches the provided name. def respond_to?(id, inc_all=false) return true if super id_str = id.to_s id_sym = id.to_sym nodes.each do |n| next if n.is_a?(String) return true if n.value == id_str || n.value == id_sym || name_matchs?(n.value, id_str) end if instance_variable_defined?(:@attributes) && !@attributes.nil? return true if @attributes.has_key?(id_str) return true if @attributes.has_key?(id_sym) end false end # - +path+ [Array] array of steps in a path # - +found+ [Array] matching nodes def alocate(path, found) step = path[0] if step.start_with?('@') # attribute raise InvalidPath.new(path) unless 1 == path.size if instance_variable_defined?(:@attributes) step = step[1..-1] sym_step = step.to_sym @attributes.each do |k,v| found << v if ('?' == step or k == step or k == sym_step) end end else # element name if (i = step.index('[')).nil? # just name name = step qual = nil else name = step[0..i-1] raise InvalidPath.new(path) unless step.end_with?(']') i += 1 qual = step[i..i] # step[i] would be better but some rubies (jruby, ree, rbx) take that as a Fixnum. if '0' <= qual and qual <= '9' qual = '+' else i += 1 end index = step[i..-2].to_i end if '?' == name or '*' == name match = nodes elsif '^' == name[0..0] # 1.8.7 thinks name[0] is a fixnum case name[1..-1] when 'Element' match = nodes.select { |e| e.is_a?(Element) } when 'String', 'Text' match = nodes.select { |e| e.is_a?(String) } when 'Comment' match = nodes.select { |e| e.is_a?(Comment) } when 'CData' match = nodes.select { |e| e.is_a?(CData) } when 'DocType' match = nodes.select { |e| e.is_a?(DocType) } else #puts "*** no match on #{name}" match = [] end else match = nodes.select { |e| e.is_a?(Element) and name == e.name } end unless qual.nil? or match.empty? case qual when '+' match = index < match.size ? [match[index]] : [] when '-' match = index <= match.size ? [match[-index]] : [] when '<' match = 0 < index ? match[0..index - 1] : [] when '>' match = index <= match.size ? match[index + 1..-1] : [] when '@' k,v = step[i..-2].split('=') if v match = match.select { |n| n.is_a?(Element) && (v == n.attributes[k.to_sym] || v == n.attributes[k]) } else match = match.select { |n| n.is_a?(Element) && (n.attributes[k.to_sym] || n.attributes[k]) } end else raise InvalidPath.new(path) end end if (1 == path.size) match.each { |n| found << n } elsif '*' == name match.each { |n| n.alocate(path, found) if n.is_a?(Element) } match.each { |n| n.alocate(path[1..-1], found) if n.is_a?(Element) } else match.each { |n| n.alocate(path[1..-1], found) if n.is_a?(Element) } end end end # - +path+ [Array] array of steps in a path def del_locate(path) step = path[0] if step.start_with?('@') # attribute raise InvalidPath.new(path) unless 1 == path.size if instance_variable_defined?(:@attributes) step = step[1..-1] sym_step = step.to_sym @attributes.delete_if { |k,v| '?' == step || k.to_sym == sym_step } end else # element name if (i = step.index('[')).nil? # just name name = step qual = nil else name = step[0..i-1] raise InvalidPath.new(path) unless step.end_with?(']') i += 1 qual = step[i..i] # step[i] would be better but some rubies (jruby, ree, rbx) take that as a Fixnum. if '0' <= qual and qual <= '9' qual = '+' else i += 1 end index = step[i..-2].to_i end if '?' == name or '*' == name match = nodes elsif '^' == name[0..0] # 1.8.7 thinks name[0] is a fixnum case name[1..-1] when 'Element' match = nodes.select { |e| e.is_a?(Element) } when 'String', 'Text' match = nodes.select { |e| e.is_a?(String) } when 'Comment' match = nodes.select { |e| e.is_a?(Comment) } when 'CData' match = nodes.select { |e| e.is_a?(CData) } when 'DocType' match = nodes.select { |e| e.is_a?(DocType) } else #puts "*** no match on #{name}" match = [] end else match = nodes.select { |e| e.is_a?(Element) and name == e.name } end unless qual.nil? or match.empty? case qual when '+' match = index < match.size ? [match[index]] : [] when '-' match = index <= match.size ? [match[-index]] : [] when '<' match = 0 < index ? match[0..index - 1] : [] when '>' match = index <= match.size ? match[index + 1..-1] : [] when '@' k,v = step[i..-2].split('=') if v match = match.select { |n| n.is_a?(Element) && (v == n.attributes[k.to_sym] || v == n.attributes[k]) } else match = match.select { |n| n.is_a?(Element) && (n.attributes[k.to_sym] || n.attributes[k]) } end else raise InvalidPath.new(path) end end if (1 == path.size) nodes.delete_if { |n| match.include?(n) } elsif '*' == name match.each { |n| n.del_locate(path) if n.is_a?(Element) } match.each { |n| n.del_locate(path[1..-1]) if n.is_a?(Element) } else match.each { |n| n.del_locate(path[1..-1]) if n.is_a?(Element) } end end end private # Removes recursively children for nodes and sub_nodes # # - +found+ [Array] An array of Ox::Element def recursive_children_removal(found) return if found.empty? nodes.tap do |ns| # found.delete(n.object_id) stops looking for an already found object_id ns.delete_if { |n| found.include?(n.object_id) ? found.delete(n.object_id) : false } nodes.each do |n| n.send(:recursive_children_removal, found) if n.is_a?(Ox::Element) end end end def name_matchs?(pat, id) return false unless pat.length == id.length pat.length.times { |i| return false unless '_' == id[i] || pat[i] == id[i] } true end end # Element end # Ox ox-2.11.0/lib/ox/raw.rb0000644000004100000410000000057413502763477014607 0ustar www-datawww-data module Ox # Raw elements are used to inject existing XML strings into a document # WARNING: Use of this feature can result in invalid XML, since `value` is # injected as-is. class Raw < Node # Creates a new Raw element with the specified value. # - +value+ [String] string value for the comment def initialize(value) super end end # Raw end # Ox ox-2.11.0/lib/ox/bag.rb0000644000004100000410000000673713502763477014556 0ustar www-datawww-data module Ox # A generic class that is used only for storing attributes. It is the base # Class for auto-generated classes in the storage system. Instance variables # are added using the instance_variable_set() method. All instance variables # can be accessed using the variable name (without the @ prefix). No setters # are provided as the Class is intended for reading only. class Bag # The initializer can take multiple arguments in the form of key values # where the key is the variable name and the value is the variable # value. This is intended for testing purposes only. # - +args+ [Hash] instance variable symbols and their values # # *Example* # # Ox::Bag.new(:@x => 42, :@y => 57) # def initialize(args={ }) args.each do |k,v| self.instance_variable_set(k, v) end end # Replaces the Object.respond_to?() method. # - +m+ [Symbol] method symbol # *return* [Boolean] true for any method that matches an instance variable # reader, otherwise false. def respond_to?(m) return true if super at_m = ('@' + m.to_s).to_sym instance_variables.include?(at_m) end # Handles requests for variable values. Others cause an Exception to be # raised. # - +m+ (Symbol) method symbol # *return* [Boolean] the value of the specified instance variable. # # _raise_ [ArgumentError] if an argument is given. Zero arguments expected. # # _raise_ [NoMethodError] if the instance variable is not defined. def method_missing(m, *args, &block) raise ArgumentError.new("wrong number of arguments (#{args.size} for 0) to method #{m}") unless args.nil? or args.empty? at_m = ('@' + m.to_s).to_sym raise NoMethodError.new("undefined method #{m}", m) unless instance_variable_defined?(at_m) instance_variable_get(at_m) end # Replaces eql?() with something more reasonable for this Class. # - +other+ [Object] Object to compare self to # *return* [Boolean] true if each variable and value are the same, otherwise false. def eql?(other) return false if (other.nil? or self.class != other.class) ova = other.instance_variables iv = instance_variables return false if ova.size != iv.size iv.each do |vid| return false if instance_variable_get(vid) != other.instance_variable_get(vid) end true end alias == eql? # Define a new class based on the Ox::Bag class. This is used internally in # the Ox module and is available to service wrappers that receive XML # requests that include Objects of Classes not defined in the storage # process. # - +classname+ (String) Class name or symbol that includes Module names. # *return* [Object] an instance of the specified Class. # # _raise_ [NameError] if the classname is invalid. def self.define_class(classname) classname = classname.to_s unless classname.is_a?(String) tokens = classname.split('::').map { |n| n.to_sym } raise NameError.new("Invalid classname '#{classname}") if tokens.empty? m = Object tokens[0..-2].each do |sym| if m.const_defined?(sym) m = m.const_get(sym) else c = Module.new m.const_set(sym, c) m = c end end sym = tokens[-1] if m.const_defined?(sym) c = m.const_get(sym) else c = Class.new(Ox::Bag) m.const_set(sym, c) end c end end # Bag end # Ox ox-2.11.0/lib/ox/sax.rb0000644000004100000410000000563313502763477014612 0ustar www-datawww-data module Ox # A SAX style parse handler. The Ox::Sax handler class should be subclasses # and then used with the Ox.sax_parse() method. The Sax methods will then be # called as the file is parsed. This is best suited for very large files or # IO streams.

# # *Example* # # require 'ox' # # class MySax < ::Ox::Sax # def initialize() # @element_names = [] # end # # def start_element(name) # @element_names << name # end # end # # any = MySax.new() # File.open('any.xml', 'r') do |f| # Ox.sax_parse(any, f) # end # # To make the desired methods active while parsing the desired method should # be made public in the subclasses. If the methods remain private they will # not be called during parsing. The 'name' argument in the callback methods # will be a Symbol. The 'str' arguments will be a String. The 'value' # arguments will be Ox::Sax::Value objects. Since both the text() and the # value() methods are called for the same element in the XML document the the # text() method is ignored if the value() method is defined or public. The # same is true for attr() and attr_value(). When all attributes have been read # the attr_done() callback will be invoked. # # def instruct(target); end # def end_instruct(target); end # def attr(name, str); end # def attr_value(name, value); end # def attrs_done(); end # def doctype(str); end # def comment(str); end # def cdata(str); end # def text(str); end # def value(value); end # def start_element(name); end # def end_element(name); end # def error(message, line, column); end # def abort(name); end # # Initializing _line_ attribute in the initializer will cause that variable to # be updated before each callback with the XML line number. The same is true # for the _column_ attribute but it will be updated with the column in the XML # file that is the start of the element or node just read. @pos if defined # will hold the number of bytes from the start of the document. class Sax # Create a new instance of the Sax handler class. def initialize() #@pos = nil #@line = nil #@column = nil end # To make the desired methods active while parsing the desired method # should be made public in the subclasses. If the methods remain private # they will not be called during parsing. private def instruct(target) end def end_instruct(target) end def attr(name, str) end def attr_value(name, value) end def attrs_done() end def doctype(str) end def comment(str) end def cdata(str) end def text(str) end def value(value) end def start_element(name) end def end_element(name) end def error(message, line, column) end def abort(name) end end # Sax end # Ox ox-2.11.0/lib/ox/hasattrs.rb0000644000004100000410000000365113502763477015646 0ustar www-datawww-data module Ox # An Object that includes the HasAttrs module can have attributes which are a Hash of String values and either String # or Symbol keys. # # To access the attributes there are several options. One is to walk the attributes. The easiest for simple regularly # formatted XML is to reference the attributes simply by name. module HasAttrs # Returns all the attributes of the Instruct as a Hash. # *return* [Hash] all attributes and attribute values. def attributes @attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil? @attributes end # Returns the value of an attribute. # - +attr+ [Symbol|String] attribute name or key to return the value for def [](attr) return nil unless instance_variable_defined?(:@attributes) and @attributes.is_a?(Hash) @attributes[attr] or (attr.is_a?(String) ? @attributes[attr.to_sym] : @attributes[attr.to_s]) end # Adds or set an attribute of the Instruct. # - +attr+ [Symbol|String] attribute name or key # - +value+ [Object] value for the attribute def []=(attr, value) raise "argument to [] must be a Symbol or a String." unless attr.is_a?(Symbol) or attr.is_a?(String) @attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil? @attributes[attr] = value.to_s end # Handles the 'easy' API that allows navigating a simple XML by # referencing attributes by name. # - +id+ [Symbol] element or attribute name # *return* [String|nil] the attribute value # _raise_ [NoMethodError] if no match is found def method_missing(id, *args, &block) ids = id.to_s if instance_variable_defined?(:@attributes) return @attributes[id] if @attributes.has_key?(id) return @attributes[ids] if @attributes.has_key?(ids) end raise NoMethodError.new("#{ids} not found", name) end end # HasAttrs end # Ox ox-2.11.0/lib/ox/xmlrpc_adapter.rb0000644000004100000410000000166113502763477017021 0ustar www-datawww-data require 'ox' module Ox # This is an alternative parser for the stdlib xmlrpc library. It makes # use of Ox and is based on REXMLStreamParser. To use it set is as the # parser for an XMLRPC client: # # require 'xmlrpc/client' # require 'ox/xmlrpc_adapter' # client = XMLRPC::Client.new2('http://some_server/rpc') # client.set_parser(Ox::StreamParser.new) class StreamParser < XMLRPC::XMLParser::AbstractStreamParser # Create a new instance. def initialize @parser_class = OxParser end # The SAX wrapper. class OxParser < Ox::Sax include XMLRPC::XMLParser::StreamParserMixin alias :text :character alias :end_element :endElement alias :start_element :startElement # Initiates the sax parser with the provided string. def parse(str) Ox.sax_parse(self, StringIO.new(str), :symbolize => false, :convert_special => true) end end end end ox-2.11.0/lib/ox.rb0000644000004100000410000000336113502763477014013 0ustar www-datawww-data# Copyright (c) 2011, Peter Ohler
# All rights reserved. # # === Description: # # Ox handles XML documents in two ways. It is a generic XML parser and writer as # well as a fast Object / XML marshaller. Ox was written for speed as a # replacement for Nokogiri and for Marshal. # # As an XML parser it is 2 or more times faster than Nokogiri and as a generic # XML writer it is 14 times faster than Nokogiri. Of course different files may # result in slightly different times. # # As an Object serializer Ox is 4 times faster than the standard Ruby # Marshal.dump(). Ox is 3 times faster than Marshal.load(). # # === Object Dump Sample: # # require 'ox' # # class Sample # attr_accessor :a, :b, :c # # def initialize(a, b, c) # @a = a # @b = b # @c = c # end # end # # # Create Object # obj = Sample.new(1, "bee", ['x', :y, 7.0]) # # Now dump the Object to an XML String. # xml = Ox.dump(obj) # # Convert the object back into a Sample Object. # obj2 = Ox.parse_obj(xml) # # === Generic XML Writing and Parsing: # # require 'ox' # # doc = Ox::Document.new(:version => '1.0') # # top = Ox::Element.new('top') # top[:name] = 'sample' # doc << top # # mid = Ox::Element.new('middle') # mid[:name] = 'second' # top << mid # # bot = Ox::Element.new('bottom') # bot[:name] = 'third' # mid << bot # # xml = Ox.dump(doc) # puts xml # doc2 = Ox.parse(xml) # puts "Same? #{doc == doc2}" module Ox end require 'ox/version' require 'ox/error' require 'ox/hasattrs' require 'ox/node' require 'ox/comment' require 'ox/raw' require 'ox/instruct' require 'ox/cdata' require 'ox/doctype' require 'ox/element' require 'ox/document' require 'ox/bag' require 'ox/sax' require 'ox/ox' # C extension ox-2.11.0/ox.gemspec0000644000004100000410000000525313502763477014267 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: ox 2.11.0 ruby libext # stub: ext/ox/extconf.rb Gem::Specification.new do |s| s.name = "ox".freeze s.version = "2.11.0" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze, "ext".freeze] s.authors = ["Peter Ohler".freeze] s.date = "2019-06-14" s.description = "A fast XML parser and object serializer that uses only standard C lib.\n \nOptimized XML (Ox), as the name implies was written to provide speed optimized\nXML handling. It was designed to be an alternative to Nokogiri and other Ruby\nXML parsers for generic XML parsing and as an alternative to Marshal for Object\nserialization. ".freeze s.email = "peter@ohler.com".freeze s.extensions = ["ext/ox/extconf.rb".freeze] s.extra_rdoc_files = ["CHANGELOG.md".freeze, "README.md".freeze] s.files = ["CHANGELOG.md".freeze, "LICENSE".freeze, "README.md".freeze, "ext/ox/attr.h".freeze, "ext/ox/base64.c".freeze, "ext/ox/base64.h".freeze, "ext/ox/buf.h".freeze, "ext/ox/builder.c".freeze, "ext/ox/cache.c".freeze, "ext/ox/cache.h".freeze, "ext/ox/cache8.c".freeze, "ext/ox/cache8.h".freeze, "ext/ox/dump.c".freeze, "ext/ox/encode.h".freeze, "ext/ox/err.c".freeze, "ext/ox/err.h".freeze, "ext/ox/extconf.rb".freeze, "ext/ox/gen_load.c".freeze, "ext/ox/hash_load.c".freeze, "ext/ox/helper.h".freeze, "ext/ox/obj_load.c".freeze, "ext/ox/ox.c".freeze, "ext/ox/ox.h".freeze, "ext/ox/parse.c".freeze, "ext/ox/sax.c".freeze, "ext/ox/sax.h".freeze, "ext/ox/sax_as.c".freeze, "ext/ox/sax_buf.c".freeze, "ext/ox/sax_buf.h".freeze, "ext/ox/sax_has.h".freeze, "ext/ox/sax_hint.c".freeze, "ext/ox/sax_hint.h".freeze, "ext/ox/sax_stack.h".freeze, "ext/ox/special.c".freeze, "ext/ox/special.h".freeze, "ext/ox/type.h".freeze, "lib/ox.rb".freeze, "lib/ox/bag.rb".freeze, "lib/ox/cdata.rb".freeze, "lib/ox/comment.rb".freeze, "lib/ox/doctype.rb".freeze, "lib/ox/document.rb".freeze, "lib/ox/element.rb".freeze, "lib/ox/error.rb".freeze, "lib/ox/hasattrs.rb".freeze, "lib/ox/instruct.rb".freeze, "lib/ox/node.rb".freeze, "lib/ox/raw.rb".freeze, "lib/ox/sax.rb".freeze, "lib/ox/version.rb".freeze, "lib/ox/xmlrpc_adapter.rb".freeze] s.homepage = "http://www.ohler.com/ox".freeze s.licenses = ["MIT".freeze] s.rdoc_options = ["--main".freeze, "README.md".freeze, "--title".freeze, "Ox Documentation".freeze, "--exclude".freeze, "extconf.rb".freeze] s.rubygems_version = "2.5.2.1".freeze s.summary = "A fast XML parser and object serializer.".freeze end ox-2.11.0/ext/0000755000004100000410000000000013502763477013067 5ustar www-datawww-dataox-2.11.0/ext/ox/0000755000004100000410000000000013502763477013515 5ustar www-datawww-dataox-2.11.0/ext/ox/obj_load.c0000644000004100000410000005571613502763477015450 0ustar www-datawww-data/* obj_load.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include #include #include "ruby.h" #include "base64.h" #include "ox.h" static void instruct(PInfo pi, const char *target, Attr attrs, const char *content); static void add_text(PInfo pi, char *text, int closed); static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren); static void end_element(PInfo pi, const char *ename); static VALUE parse_time(const char *text, VALUE clas); static VALUE parse_xsd_time(const char *text, VALUE clas); static VALUE parse_double_time(const char *text, VALUE clas); static VALUE parse_regexp(const char *text); static ID get_var_sym_from_attrs(Attr a, void *encoding); static VALUE get_obj_from_attrs(Attr a, PInfo pi, VALUE base_class); static VALUE get_class_from_attrs(Attr a, PInfo pi, VALUE base_class); static VALUE classname2class(const char *name, PInfo pi, VALUE base_class); static unsigned long get_id_from_attrs(PInfo pi, Attr a); static CircArray circ_array_new(void); static void circ_array_free(CircArray ca); static void circ_array_set(CircArray ca, VALUE obj, unsigned long id); static VALUE circ_array_get(CircArray ca, unsigned long id); static void debug_stack(PInfo pi, const char *comment); static void fill_indent(PInfo pi, char *buf, size_t size); struct _parseCallbacks _ox_obj_callbacks = { instruct, /* instruct, */ 0, /* add_doctype, */ 0, /* add_comment, */ 0, /* add_cdata, */ add_text, add_element, end_element, NULL, }; ParseCallbacks ox_obj_callbacks = &_ox_obj_callbacks; extern ParseCallbacks ox_gen_callbacks; inline static VALUE str2sym(const char *str, void *encoding) { VALUE sym; #ifdef HAVE_RUBY_ENCODING_H if (0 != encoding) { VALUE rstr = rb_str_new2(str); rb_enc_associate(rstr, (rb_encoding*)encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(str)); } #else sym = ID2SYM(rb_intern(str)); #endif return sym; } inline static ID name2var(const char *name, void *encoding) { VALUE *slot; ID var_id = 0; if ('0' <= *name && *name <= '9') { var_id = INT2NUM(atoi(name)); } else if (Qundef == (var_id = ox_cache_get(ox_attr_cache, name, &slot, 0))) { #ifdef HAVE_RUBY_ENCODING_H if (0 != encoding) { volatile VALUE rstr = rb_str_new2(name); volatile VALUE sym; rb_enc_associate(rstr, (rb_encoding*)encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); // Needed for Ruby 2.2 to get around the GC of symbols // created with to_sym which is needed for encoded symbols. rb_ary_push(ox_sym_bank, sym); var_id = SYM2ID(sym); } else { var_id = rb_intern(name); } #else var_id = rb_intern(name); #endif *slot = var_id; } return var_id; } inline static VALUE resolve_classname(VALUE mod, const char *class_name, Effort effort, VALUE base_class) { VALUE clas; ID ci = rb_intern(class_name); switch (effort) { case TolerantEffort: if (rb_const_defined_at(mod, ci)) { clas = rb_const_get_at(mod, ci); } else { clas = Qundef; } break; case AutoEffort: if (rb_const_defined_at(mod, ci)) { clas = rb_const_get_at(mod, ci); } else { clas = rb_define_class_under(mod, class_name, base_class); } break; case StrictEffort: default: /* raise an error if name is not defined */ clas = rb_const_get_at(mod, ci); break; } return clas; } inline static VALUE classname2obj(const char *name, PInfo pi, VALUE base_class) { VALUE clas = classname2class(name, pi, base_class); if (Qundef == clas) { return Qnil; } else { return rb_obj_alloc(clas); } } #if HAS_RSTRUCT inline static VALUE structname2obj(const char *name) { VALUE ost; const char *s = name; for (; 1; s++) { if ('\0' == *s) { s = name; break; } else if (':' == *s) { s += 2; break; } } ost = rb_const_get(ox_struct_class, rb_intern(s)); /* use encoding as the indicator for Ruby 1.8.7 or 1.9.x */ #if HAS_ENCODING_SUPPORT return rb_struct_alloc_noinit(ost); #elif HAS_PRIVATE_ENCODING return rb_struct_alloc_noinit(ost); #else return rb_struct_new(ost); #endif } #endif inline static VALUE parse_ulong(const char *s, PInfo pi) { unsigned long n = 0; for (; '\0' != *s; s++) { if ('0' <= *s && *s <= '9') { n = n * 10 + (*s - '0'); } else { set_error(&pi->err, "Invalid number for a julian day", pi->str, pi->s); return Qundef; } } return ULONG2NUM(n); } /* 2010-07-09T10:47:45.895826162+09:00 */ inline static VALUE parse_time(const char *text, VALUE clas) { VALUE t; if (Qnil == (t = parse_double_time(text, clas)) && Qnil == (t = parse_xsd_time(text, clas))) { VALUE args[1]; *args = rb_str_new2(text); t = rb_funcall2(ox_time_class, ox_parse_id, 1, args); } return t; } static VALUE classname2class(const char *name, PInfo pi, VALUE base_class) { VALUE *slot; VALUE clas; if (Qundef == (clas = ox_cache_get(ox_class_cache, name, &slot, 0))) { char class_name[1024]; char *s; const char *n = name; clas = rb_cObject; for (s = class_name; '\0' != *n; n++) { if (':' == *n) { *s = '\0'; n++; if (':' != *n) { set_error(&pi->err, "Invalid classname, expected another ':'", pi->str, pi->s); return Qundef; } if (Qundef == (clas = resolve_classname(clas, class_name, pi->options->effort, base_class))) { return Qundef; } s = class_name; } else { *s++ = *n; } } *s = '\0'; if (Qundef != (clas = resolve_classname(clas, class_name, pi->options->effort, base_class))) { *slot = clas; } } return clas; } static ID get_var_sym_from_attrs(Attr a, void *encoding) { for (; 0 != a->name; a++) { if ('a' == *a->name && '\0' == *(a->name + 1)) { return name2var(a->value, encoding); } } return 0; } static VALUE get_obj_from_attrs(Attr a, PInfo pi, VALUE base_class) { for (; 0 != a->name; a++) { if ('c' == *a->name && '\0' == *(a->name + 1)) { return classname2obj(a->value, pi, base_class); } } return Qundef; } #if HAS_RSTRUCT static VALUE get_struct_from_attrs(Attr a) { for (; 0 != a->name; a++) { if ('c' == *a->name && '\0' == *(a->name + 1)) { return structname2obj(a->value); } } return Qundef; } #endif static VALUE get_class_from_attrs(Attr a, PInfo pi, VALUE base_class) { for (; 0 != a->name; a++) { if ('c' == *a->name && '\0' == *(a->name + 1)) { return classname2class(a->value, pi, base_class); } } return Qundef; } static unsigned long get_id_from_attrs(PInfo pi, Attr a) { for (; 0 != a->name; a++) { if ('i' == *a->name && '\0' == *(a->name + 1)) { unsigned long id = 0; const char *text = a->value; char c; for (; '\0' != *text; text++) { c = *text; if ('0' <= c && c <= '9') { id = id * 10 + (c - '0'); } else { set_error(&pi->err, "bad number format", pi->str, pi->s); return 0; } } return id; } } return 0; } static CircArray circ_array_new() { CircArray ca; ca = ALLOC(struct _circArray); ca->objs = ca->obj_array; ca->size = sizeof(ca->obj_array) / sizeof(VALUE); ca->cnt = 0; return ca; } static void circ_array_free(CircArray ca) { if (ca->objs != ca->obj_array) { xfree(ca->objs); } xfree(ca); } static void circ_array_set(CircArray ca, VALUE obj, unsigned long id) { if (0 < id) { unsigned long i; if (ca->size < id) { unsigned long cnt = id + 512; if (ca->objs == ca->obj_array) { ca->objs = ALLOC_N(VALUE, cnt); memcpy(ca->objs, ca->obj_array, sizeof(VALUE) * ca->cnt); } else { REALLOC_N(ca->objs, VALUE, cnt); } ca->size = cnt; } id--; for (i = ca->cnt; i < id; i++) { ca->objs[i] = Qundef; } ca->objs[id] = obj; if (ca->cnt <= id) { ca->cnt = id + 1; } } } static VALUE circ_array_get(CircArray ca, unsigned long id) { VALUE obj = Qundef; if (id <= ca->cnt) { obj = ca->objs[id - 1]; } return obj; } static VALUE parse_regexp(const char *text) { const char *te; int options = 0; te = text + strlen(text) - 1; #if HAS_ONIG for (; text < te && '/' != *te; te--) { switch (*te) { case 'i': options |= ONIG_OPTION_IGNORECASE; break; case 'm': options |= ONIG_OPTION_MULTILINE; break; case 'x': options |= ONIG_OPTION_EXTEND; break; default: break; } } #endif return rb_reg_new(text + 1, te - text - 1, options); } static void instruct(PInfo pi, const char *target, Attr attrs, const char *content) { if (0 == strcmp("xml", target)) { #if HAS_ENCODING_SUPPORT for (; 0 != attrs->name; attrs++) { if (0 == strcmp("encoding", attrs->name)) { pi->options->rb_enc = rb_enc_find(attrs->value); } } #elif HAS_PRIVATE_ENCODING for (; 0 != attrs->name; attrs++) { if (0 == strcmp("encoding", attrs->name)) { pi->options->rb_enc = rb_str_new2(attrs->value); } } #endif } } static void add_text(PInfo pi, char *text, int closed) { Helper h = helper_stack_peek(&pi->helpers); if (!closed) { set_error(&pi->err, "Text not closed", pi->str, pi->s); return; } if (0 == h) { set_error(&pi->err, "Unexpected text", pi->str, pi->s); return; } if (DEBUG <= pi->options->trace) { char indent[128]; fill_indent(pi, indent, sizeof(indent)); printf("%s '%s' to type %c\n", indent, text, h->type); } switch (h->type) { case NoCode: case StringCode: h->obj = rb_str_new2(text); #if HAS_ENCODING_SUPPORT if (0 != pi->options->rb_enc) { rb_enc_associate(h->obj, pi->options->rb_enc); } #elif HAS_PRIVATE_ENCODING if (Qnil != pi->options->rb_enc) { rb_funcall(h->obj, ox_force_encoding_id, 1, pi->options->rb_enc); } #endif if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, (unsigned long)pi->id); } break; case FixnumCode: { long n = 0; char c; int neg = 0; if ('-' == *text) { neg = 1; text++; } for (; '\0' != *text; text++) { c = *text; if ('0' <= c && c <= '9') { n = n * 10 + (c - '0'); } else { set_error(&pi->err, "bad number format", pi->str, pi->s); return; } } if (neg) { n = -n; } h->obj = LONG2NUM(n); break; } case FloatCode: h->obj = rb_float_new(strtod(text, 0)); break; case SymbolCode: { VALUE sym; VALUE *slot; if (Qundef == (sym = ox_cache_get(ox_symbol_cache, text, &slot, 0))) { sym = str2sym(text, (void*)pi->options->rb_enc); // Needed for Ruby 2.2 to get around the GC of symbols created with // to_sym which is needed for encoded symbols. rb_ary_push(ox_sym_bank, sym); *slot = sym; } h->obj = sym; break; } case DateCode: { VALUE args[1]; if (Qundef == (*args = parse_ulong(text, pi))) { return; } h->obj = rb_funcall2(ox_date_class, ox_jd_id, 1, args); break; } case TimeCode: h->obj = parse_time(text, ox_time_class); break; case String64Code: { unsigned long str_size = b64_orig_size(text); VALUE v; char *str = ALLOCA_N(char, str_size + 1); from_base64(text, (uchar*)str); v = rb_str_new(str, str_size); #if HAS_ENCODING_SUPPORT if (0 != pi->options->rb_enc) { rb_enc_associate(v, pi->options->rb_enc); } #elif HAS_PRIVATE_ENCODING if (0 != pi->options->rb_enc) { rb_funcall(v, ox_force_encoding_id, 1, pi->options->rb_enc); } #endif if (0 != pi->circ_array) { circ_array_set(pi->circ_array, v, (unsigned long)h->obj); } h->obj = v; break; } case Symbol64Code: { VALUE sym; VALUE *slot; unsigned long str_size = b64_orig_size(text); char *str = ALLOCA_N(char, str_size + 1); from_base64(text, (uchar*)str); if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, 0))) { sym = str2sym(str, (void*)pi->options->rb_enc); // Needed for Ruby 2.2 to get around the GC of symbols created with // to_sym which is needed for encoded symbols. rb_ary_push(ox_sym_bank, sym); *slot = sym; } h->obj = sym; break; } case RegexpCode: if ('/' == *text) { h->obj = parse_regexp(text); } else { unsigned long str_size = b64_orig_size(text); char *str = ALLOCA_N(char, str_size + 1); from_base64(text, (uchar*)str); h->obj = parse_regexp(str); } break; case BignumCode: h->obj = rb_cstr_to_inum(text, 10, 1); break; case BigDecimalCode: #if HAS_BIGDECIMAL h->obj = rb_funcall(rb_cObject, ox_bigdecimal_id, 1, rb_str_new2(text)); #else h->obj = Qnil; #endif break; default: h->obj = Qnil; break; } } static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) { Attr a; Helper h; unsigned long id; if (TRACE <= pi->options->trace) { char buf[1024]; char indent[128]; char *s = buf; char *end = buf + sizeof(buf) - 2; s += snprintf(s, end - s, " <%s%s", (hasChildren) ? "" : "/", ename); for (a = attrs; 0 != a->name; a++) { s += snprintf(s, end - s, " %s=%s", a->name, a->value); } *s++ = '>'; *s++ = '\0'; if (DEBUG <= pi->options->trace) { printf("===== add element stack(%d) =====\n", helper_stack_depth(&pi->helpers)); debug_stack(pi, buf); } else { fill_indent(pi, indent, sizeof(indent)); printf("%s%s\n", indent, buf); } } if (helper_stack_empty(&pi->helpers)) { /* top level object */ if (0 != (id = get_id_from_attrs(pi, attrs))) { pi->circ_array = circ_array_new(); } } if ('\0' != ename[1]) { set_error(&pi->err, "Invalid element name", pi->str, pi->s); return; } h = helper_stack_push(&pi->helpers, get_var_sym_from_attrs(attrs, (void*)pi->options->rb_enc), Qundef, *ename); switch (h->type) { case NilClassCode: h->obj = Qnil; break; case TrueClassCode: h->obj = Qtrue; break; case FalseClassCode: h->obj = Qfalse; break; case StringCode: /* h->obj will be replaced by add_text if it is called */ h->obj = ox_empty_string; if (0 != pi->circ_array) { pi->id = get_id_from_attrs(pi, attrs); circ_array_set(pi->circ_array, h->obj, pi->id); } break; case FixnumCode: case FloatCode: case SymbolCode: case Symbol64Code: case RegexpCode: case BignumCode: case BigDecimalCode: case ComplexCode: case DateCode: case TimeCode: case RationalCode: /* sub elements read next */ /* value will be read in the following add_text */ h->obj = Qundef; break; case String64Code: h->obj = Qundef; if (0 != pi->circ_array) { pi->id = get_id_from_attrs(pi, attrs); } break; case ArrayCode: h->obj = rb_ary_new(); if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } break; case HashCode: h->obj = rb_hash_new(); if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } break; case RangeCode: h->obj = rb_range_new(ox_zero_fixnum, ox_zero_fixnum, Qfalse); break; case RawCode: if (hasChildren) { h->obj = ox_parse(pi->s, pi->end - pi->s, ox_gen_callbacks, &pi->s, pi->options, &pi->err); if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } } else { h->obj = Qnil; } break; case ExceptionCode: if (Qundef == (h->obj = get_obj_from_attrs(attrs, pi, rb_eException))) { return; } if (0 != pi->circ_array && Qnil != h->obj) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } break; case ObjectCode: if (Qundef == (h->obj = get_obj_from_attrs(attrs, pi, ox_bag_clas))) { return; } if (0 != pi->circ_array && Qnil != h->obj) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } break; case StructCode: #if HAS_RSTRUCT h->obj = get_struct_from_attrs(attrs); if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } #else set_error(&pi->err, "Ruby structs not supported with this verion of Ruby", pi->str, pi->s); return; #endif break; case ClassCode: if (Qundef == (h->obj = get_class_from_attrs(attrs, pi, ox_bag_clas))) { return; } break; case RefCode: h->obj = Qundef; if (0 != pi->circ_array) { h->obj = circ_array_get(pi->circ_array, get_id_from_attrs(pi, attrs)); } if (Qundef == h->obj) { set_error(&pi->err, "Invalid circular reference", pi->str, pi->s); return; } break; default: set_error(&pi->err, "Invalid element name", pi->str, pi->s); return; break; } if (DEBUG <= pi->options->trace) { debug_stack(pi, " -----------"); } } static void end_element(PInfo pi, const char *ename) { if (TRACE <= pi->options->trace) { char indent[128]; if (DEBUG <= pi->options->trace) { char buf[1024]; printf("===== end element stack(%d) =====\n", helper_stack_depth(&pi->helpers)); snprintf(buf, sizeof(buf) - 1, "", ename); debug_stack(pi, buf); } else { fill_indent(pi, indent, sizeof(indent)); printf("%s\n", indent, ename); } } if (!helper_stack_empty(&pi->helpers)) { Helper h = helper_stack_pop(&pi->helpers); Helper ph = helper_stack_peek(&pi->helpers); if (ox_empty_string == h->obj) { /* special catch for empty strings */ h->obj = rb_str_new2(""); } if (Qundef == h->obj) { set_error(&pi->err, "Invalid element for object mode", pi->str, pi->s); return; } pi->obj = h->obj; if (0 != ph) { switch (ph->type) { case ArrayCode: rb_ary_push(ph->obj, h->obj); break; case ExceptionCode: case ObjectCode: if (Qnil != ph->obj) { if (0 == h->var) { set_error(&pi->err, "Invalid element for object mode", pi->str, pi->s); return; } rb_ivar_set(ph->obj, h->var, h->obj); } break; case StructCode: #if HAS_RSTRUCT if (0 == h->var) { set_error(&pi->err, "Invalid element for object mode", pi->str, pi->s); return; } rb_struct_aset(ph->obj, h->var, h->obj); #else set_error(&pi->err, "Ruby structs not supported with this verion of Ruby", pi->str, pi->s); return; #endif break; case HashCode: // put back h helper_stack_push(&pi->helpers, h->var, h->obj, KeyCode); break; case RangeCode: #if HAS_RSTRUCT if (ox_beg_id == h->var) { RSTRUCT_SET(ph->obj, 0, h->obj); } else if (ox_end_id == h->var) { RSTRUCT_SET(ph->obj, 1, h->obj); } else if (ox_excl_id == h->var) { RSTRUCT_SET(ph->obj, 2, h->obj); } else { set_error(&pi->err, "Invalid range attribute", pi->str, pi->s); return; } #else set_error(&pi->err, "Ruby structs not supported with this verion of Ruby", pi->str, pi->s); return; #endif break; case KeyCode: { Helper gh; helper_stack_pop(&pi->helpers); if (NULL == (gh = helper_stack_peek(&pi->helpers)) || Qundef == ph->obj || Qundef == h->obj) { set_error(&pi->err, "Corrupt parse stack, container is wrong type", pi->str, pi->s); return; } rb_hash_aset(gh->obj, ph->obj, h->obj); } break; case ComplexCode: #ifdef T_COMPLEX if (Qundef == ph->obj) { ph->obj = h->obj; } else { ph->obj = rb_complex_new(ph->obj, h->obj); } #else set_error(&pi->err, "Complex Objects not implemented in Ruby 1.8.7", pi->str, pi->s); return; #endif break; case RationalCode: { if (Qundef == h->obj || RUBY_T_FIXNUM != rb_type(h->obj)) { set_error(&pi->err, "Invalid object format", pi->str, pi->s); return; } #ifdef T_RATIONAL if (Qundef == ph->obj) { ph->obj = h->obj; } else { if (Qundef == ph->obj || RUBY_T_FIXNUM != rb_type(h->obj)) { set_error(&pi->err, "Corrupt parse stack, container is wrong type", pi->str, pi->s); return; } #ifdef RUBINIUS_RUBY ph->obj = rb_Rational(ph->obj, h->obj); #else ph->obj = rb_rational_new(ph->obj, h->obj); #endif } #else set_error(&pi->err, "Rational Objects not implemented in Ruby 1.8.7", pi->str, pi->s); return; #endif break; } default: set_error(&pi->err, "Corrupt parse stack, container is wrong type", pi->str, pi->s); return; break; } } } if (0 != pi->circ_array && helper_stack_empty(&pi->helpers)) { circ_array_free(pi->circ_array); pi->circ_array = 0; } if (DEBUG <= pi->options->trace) { debug_stack(pi, " ----------"); } } static VALUE parse_double_time(const char *text, VALUE clas) { long v = 0; long v2 = 0; const char *dot = 0; char c; for (; '.' != *text; text++) { c = *text; if (c < '0' || '9' < c) { return Qnil; } v = 10 * v + (long)(c - '0'); } dot = text++; for (; '\0' != *text && text - dot <= 6; text++) { c = *text; if (c < '0' || '9' < c) { return Qnil; } v2 = 10 * v2 + (long)(c - '0'); } for (; text - dot <= 9; text++) { v2 *= 10; } #if HAS_NANO_TIME return rb_time_nano_new(v, v2); #else return rb_time_new(v, v2 / 1000); #endif } typedef struct _tp { int cnt; char end; char alt; } *Tp; static VALUE parse_xsd_time(const char *text, VALUE clas) { long cargs[10]; long *cp = cargs; long v; int i; char c; struct _tp tpa[10] = { { 4, '-', '-' }, { 2, '-', '-' }, { 2, 'T', 'T' }, { 2, ':', ':' }, { 2, ':', ':' }, { 2, '.', '.' }, { 9, '+', '-' }, { 2, ':', ':' }, { 2, '\0', '\0' }, { 0, '\0', '\0' } }; Tp tp = tpa; struct tm tm; for (; 0 != tp->cnt; tp++) { for (i = tp->cnt, v = 0; 0 < i ; text++, i--) { c = *text; if (c < '0' || '9' < c) { if (tp->end == c || tp->alt == c) { break; } return Qnil; } v = 10 * v + (long)(c - '0'); } c = *text++; if (tp->end != c && tp->alt != c) { return Qnil; } *cp++ = v; } tm.tm_year = (int)cargs[0] - 1900; tm.tm_mon = (int)cargs[1] - 1; tm.tm_mday = (int)cargs[2]; tm.tm_hour = (int)cargs[3]; tm.tm_min = (int)cargs[4]; tm.tm_sec = (int)cargs[5]; #if HAS_NANO_TIME return rb_time_nano_new(mktime(&tm), cargs[6]); #else return rb_time_new(mktime(&tm), cargs[6] / 1000); #endif } /* debug functions */ static void fill_indent(PInfo pi, char *buf, size_t size) { size_t cnt; if (0 < (cnt = helper_stack_depth(&pi->helpers))) { cnt *= 2; if (size < cnt + 1) { cnt = size - 1; } memset(buf, ' ', cnt); buf += cnt; } *buf = '\0'; } static void debug_stack(PInfo pi, const char *comment) { char indent[128]; Helper h; fill_indent(pi, indent, sizeof(indent)); printf("%s%s\n", indent, comment); if (!helper_stack_empty(&pi->helpers)) { for (h = pi->helpers.head; h < pi->helpers.tail; h++) { const char *clas = "---"; const char *key = "---"; if (Qundef != h->obj) { VALUE c = rb_obj_class(h->obj); clas = rb_class2name(c); } if (0 != h->var) { if (HashCode == h->type) { VALUE v; v = rb_funcall2(h->var, rb_intern("to_s"), 0, 0); key = StringValuePtr(v); } else if (ObjectCode == (h - 1)->type || ExceptionCode == (h - 1)->type || RangeCode == (h - 1)->type || StructCode == (h - 1)->type) { key = rb_id2name(h->var); } else { printf("%s*** corrupt stack ***\n", indent); } } printf("%s [%c] %s : %s\n", indent, h->type, clas, key); } } } ox-2.11.0/ext/ox/sax_stack.h0000644000004100000410000000344213502763477015651 0ustar www-datawww-data/* sax_stack.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_SAX_STACK_H #define OX_SAX_STACK_H #include "sax_hint.h" #define STACK_INC 32 typedef struct _nv { const char *name; VALUE val; int childCnt; Hint hint; } *Nv; typedef struct _nStack { struct _nv base[STACK_INC]; Nv head; /* current stack */ Nv end; /* stack end */ Nv tail; /* pointer to one past last element name on stack */ } *NStack; inline static void stack_init(NStack stack) { stack->head = stack->base; stack->end = stack->base + sizeof(stack->base) / sizeof(struct _nv); stack->tail = stack->head; } inline static int stack_empty(NStack stack) { return (stack->head == stack->tail); } inline static void stack_cleanup(NStack stack) { if (stack->base != stack->head) { xfree(stack->head); } } inline static void stack_push(NStack stack, const char *name, VALUE val, Hint hint) { if (stack->end <= stack->tail) { size_t len = stack->end - stack->head; size_t toff = stack->tail - stack->head; if (stack->base == stack->head) { stack->head = ALLOC_N(struct _nv, len + STACK_INC); memcpy(stack->head, stack->base, sizeof(struct _nv) * len); } else { REALLOC_N(stack->head, struct _nv, len + STACK_INC); } stack->tail = stack->head + toff; stack->end = stack->head + len + STACK_INC; } stack->tail->name = name; stack->tail->val = val; stack->tail->hint = hint; stack->tail->childCnt = 0; stack->tail++; } inline static Nv stack_peek(NStack stack) { if (stack->head < stack->tail) { return stack->tail - 1; } return 0; } inline static Nv stack_pop(NStack stack) { if (stack->head < stack->tail) { stack->tail--; return stack->tail; } return 0; } #endif /* OX_SAX_STACK_H */ ox-2.11.0/ext/ox/hash_load.c0000644000004100000410000001301713502763477015605 0ustar www-datawww-data/* hash_load.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include #include #include "ruby.h" #include "ox.h" // The approach taken for the hash and has_no_attrs parsing is to push just // the key on to the stack and then decide what to do on the way up/out. static VALUE create_top(PInfo pi) { volatile VALUE top = rb_hash_new();; helper_stack_push(&pi->helpers, 0, top, HashCode); pi->obj = top; return top; } static void add_text(PInfo pi, char *text, int closed) { Helper parent = helper_stack_peek(&pi->helpers); volatile VALUE s = rb_str_new2(text); volatile VALUE a; #if HAS_ENCODING_SUPPORT if (0 != pi->options->rb_enc) { rb_enc_associate(s, pi->options->rb_enc); } #elif HAS_PRIVATE_ENCODING if (Qnil != pi->options->rb_enc) { rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc); } #endif switch (parent->type) { case NoCode: parent->obj = s; parent->type = StringCode; break; case ArrayCode: rb_ary_push(parent->obj, s); break; default: a = rb_ary_new(); rb_ary_push(a, parent->obj); rb_ary_push(a, s); parent->obj = a; parent->type = ArrayCode; break; } } static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) { if (helper_stack_empty(&pi->helpers)) { create_top(pi); } if (NULL != attrs && NULL != attrs->name) { volatile VALUE h = rb_hash_new(); volatile VALUE key; volatile VALUE val; volatile VALUE a; for (; 0 != attrs->name; attrs++) { if (Qnil != pi->options->attr_key_mod) { key = rb_funcall(pi->options->attr_key_mod, ox_call_id, 1, rb_str_new2(attrs->name)); } else if (Yes == pi->options->sym_keys) { key = rb_id2sym(rb_intern(attrs->name)); } else { key = rb_str_new2(attrs->name); } val = rb_str_new2(attrs->value); #if HAS_ENCODING_SUPPORT if (0 != pi->options->rb_enc) { rb_enc_associate(val, pi->options->rb_enc); } #elif HAS_PRIVATE_ENCODING if (Qnil != pi->options->rb_enc) { rb_funcall(val, ox_force_encoding_id, 1, pi->options->rb_enc); } #endif rb_hash_aset(h, key, val); } a = rb_ary_new(); rb_ary_push(a, h); rb_obj_taint(a); // flag indicating it is a unit, kind of a hack but it works helper_stack_push(&pi->helpers, rb_intern(ename), a, ArrayCode); } else { helper_stack_push(&pi->helpers, rb_intern(ename), Qnil, NoCode); } } static void add_element_no_attrs(PInfo pi, const char *ename, Attr attrs, int hasChildren) { if (helper_stack_empty(&pi->helpers)) { create_top(pi); } helper_stack_push(&pi->helpers, rb_intern(ename), Qnil, NoCode); } static int untaint_hash_cb(VALUE key, VALUE value, VALUE x) { if (Qtrue == rb_obj_tainted(value)) { rb_obj_untaint(value); } return ST_CONTINUE; } static void end_element_core(PInfo pi, const char *ename, bool check_taint) { Helper e = helper_stack_pop(&pi->helpers); Helper parent = helper_stack_peek(&pi->helpers); volatile VALUE pobj = parent->obj; volatile VALUE found = Qundef; volatile VALUE key; volatile VALUE a; if (NoCode == e->type) { e->obj = Qnil; } if (Qnil != pi->options->element_key_mod) { key = rb_funcall(pi->options->element_key_mod, ox_call_id, 1, rb_id2str(e->var)); } else if (Yes == pi->options->sym_keys) { key = rb_id2sym(e->var); } else { key = rb_id2str(e->var); } // Make sure the parent is a Hash. If not set then make a Hash. If an // Array or non-Hash then append to array or create and append. switch (parent->type) { case NoCode: pobj = rb_hash_new(); parent->obj = pobj; parent->type = HashCode; break; case ArrayCode: pobj = rb_hash_new(); rb_ary_push(parent->obj, pobj); break; case HashCode: found = rb_hash_lookup2(parent->obj, key, Qundef); break; default: a = rb_ary_new(); rb_ary_push(a, parent->obj); pobj = rb_hash_new(); rb_ary_push(a, pobj); parent->obj = a; parent->type = ArrayCode; break; } if (Qundef == found) { rb_hash_aset(pobj, key, e->obj); } else if (RUBY_T_ARRAY == rb_type(found)) { if (check_taint && Qtrue == rb_obj_tainted(found)) { rb_obj_untaint(found); a = rb_ary_new(); rb_ary_push(a, found); rb_ary_push(a, e->obj); rb_hash_aset(pobj, key, a); } else { rb_ary_push(found, e->obj); } } else { // something there other than an array if (check_taint && Qtrue == rb_obj_tainted(e->obj)) { rb_obj_untaint(e->obj); } a = rb_ary_new(); rb_ary_push(a, found); rb_ary_push(a, e->obj); rb_hash_aset(pobj, key, a); } if (check_taint && RUBY_T_HASH == rb_type(e->obj)) { rb_hash_foreach(e->obj, untaint_hash_cb, Qnil); } } static void end_element(PInfo pi, const char *ename) { end_element_core(pi, ename, true); } static void end_element_no_attrs(PInfo pi, const char *ename) { end_element_core(pi, ename, false); } static void finish(PInfo pi) { if (Qnil != pi->obj && RUBY_T_HASH == rb_type(pi->obj)) { rb_hash_foreach(pi->obj, untaint_hash_cb, Qnil); } } struct _parseCallbacks _ox_hash_callbacks = { NULL, NULL, NULL, NULL, add_text, add_element, end_element, finish, }; ParseCallbacks ox_hash_callbacks = &_ox_hash_callbacks; struct _parseCallbacks _ox_hash_no_attrs_callbacks = { NULL, NULL, NULL, NULL, add_text, add_element_no_attrs, end_element_no_attrs, NULL, }; ParseCallbacks ox_hash_no_attrs_callbacks = &_ox_hash_no_attrs_callbacks; ox-2.11.0/ext/ox/sax_buf.h0000644000004100000410000001011713502763477015315 0ustar www-datawww-data/* sax_buf.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_SAX_BUF_H #define OX_SAX_BUF_H #include typedef struct _buf { char base[0x00001000]; char *head; char *end; char *tail; char *read_end; /* one past last character read */ char *pro; /* protection start, buffer can not slide past this point */ char *str; /* start of current string being read */ off_t pos; off_t line; off_t col; off_t pro_pos; off_t pro_line; off_t pro_col; int (*read_func)(struct _buf *buf); union { int fd; VALUE io; const char *str; } in; struct _saxDrive *dr; } *Buf; typedef struct _checkPt { off_t pro_dif; off_t pos; off_t line; off_t col; char c; } *CheckPt; #define CHECK_PT_INIT { -1, 0, 0, 0, '\0' } extern void ox_sax_buf_init(Buf buf, VALUE io); extern int ox_sax_buf_read(Buf buf); static inline char buf_get(Buf buf) { //printf("*** drive get from '%s' from start: %ld buf: %p from read_end: %ld\n", buf->tail, buf->tail - buf->head, buf->head, buf->read_end - buf->tail); if (buf->read_end <= buf->tail) { if (0 != ox_sax_buf_read(buf)) { return '\0'; } } if ('\n' == *buf->tail) { buf->line++; buf->col = 0; } else { buf->col++; } buf->pos++; return *buf->tail++; } static inline void buf_backup(Buf buf) { buf->tail--; buf->col--; buf->pos--; if (0 >= buf->col) { buf->line--; // allow col to be negative since we never backup twice in a row } } static inline void buf_protect(Buf buf) { buf->pro = buf->tail; buf->str = buf->tail; // can't have str before pro buf->pro_pos = buf->pos; buf->pro_line = buf->line; buf->pro_col = buf->col; } static inline void buf_reset(Buf buf) { buf->tail = buf->pro; buf->pos = buf->pro_pos; buf->line = buf->pro_line; buf->col = buf->pro_col; } /* Starts by reading a character so it is safe to use with an empty or * compacted buffer. */ static inline char buf_next_non_white(Buf buf) { char c; while ('\0' != (c = buf_get(buf))) { switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: return c; } } return '\0'; } /* Starts by reading a character so it is safe to use with an empty or * compacted buffer. */ static inline char buf_next_white(Buf buf) { char c; while ('\0' != (c = buf_get(buf))) { switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': case '\0': return c; default: break; } } return '\0'; } static inline void buf_cleanup(Buf buf) { if (buf->base != buf->head && 0 != buf->head) { xfree(buf->head); buf->head = 0; } } static inline int is_white(char c) { switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': return 1; default: break; } return 0; } static inline void buf_checkpoint(Buf buf, CheckPt cp) { cp->pro_dif = (int)(buf->tail - buf->pro); cp->pos = buf->pos; cp->line = buf->line; cp->col = buf->col; cp->c = *(buf->tail - 1); } static inline int buf_checkset(CheckPt cp) { return (0 <= cp->pro_dif); } static inline char buf_checkback(Buf buf, CheckPt cp) { buf->tail = buf->pro + cp->pro_dif; buf->pos = cp->pos; buf->line = cp->line; buf->col = cp->col; return cp->c; } static inline void buf_collapse_return(char *str) { char *s = str; char *back = str; for (; '\0' != *s; s++) { if (back != str && '\n' == *s && '\r' == *(back - 1)) { *(back - 1) = '\n'; } else { *back++ = *s; } } *back = '\0'; } static inline void buf_collapse_white(char *str) { char *s = str; char *back = str; for (; '\0' != *s; s++) { switch(*s) { case ' ': case '\t': case '\f': case '\n': case '\r': if (back == str || ' ' != *(back - 1)) { *back++ = ' '; } break; default: *back++ = *s; break; } } *back = '\0'; } #endif /* OX_SAX_BUF_H */ ox-2.11.0/ext/ox/ox.h0000644000004100000410000001543213502763477014321 0ustar www-datawww-data/* ox.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_H #define OX_H #if defined(__cplusplus) extern "C" { #if 0 } /* satisfy cc-mode */ #endif #endif #define RSTRING_NOT_MODIFIED #include "ruby.h" #if HAS_ENCODING_SUPPORT #include "ruby/encoding.h" #endif #ifdef RUBINIUS_RUBY #undef T_COMPLEX enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK}; #else #if HAS_TOP_LEVEL_ST_H /* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */ #include "st.h" #else #include "ruby/st.h" #endif #endif #include "cache.h" #include "err.h" #include "type.h" #include "attr.h" #include "helper.h" #define raise_error(msg, xml, current) _ox_raise_error(msg, xml, current, __FILE__, __LINE__) #define MAX_TEXT_LEN 4096 #define SILENT 0 #define TRACE 1 #define DEBUG 2 #define XSD_DATE 0x0001 #define WITH_XML 0x0002 #define WITH_INST 0x0004 #define WITH_DTD 0x0008 #define CIRCULAR 0x0010 #define XSD_DATE_SET 0x0100 #define WITH_XML_SET 0x0200 #define WITH_INST_SET 0x0400 #define WITH_DTD_SET 0x0800 #define CIRCULAR_SET 0x1000 typedef enum { UseObj = 1, UseAttr = 2, UseAttrSet = 3, UseArray = 4, UseAMember = 5, UseHash = 6, UseHashKey = 7, UseHashVal = 8, UseRange = 9, UseRangeAttr= 10, UseRaw = 11, } Use; typedef enum { StrictEffort = 's', TolerantEffort = 't', AutoEffort = 'a', NoEffort = 0, } Effort; typedef enum { Yes = 'y', No = 'n', NotSet = 0 } YesNo; typedef enum { ObjMode = 'o', GenMode = 'g', LimMode = 'l', HashMode = 'h', HashNoAttrMode = 'n', NoMode = 0 } LoadMode; typedef enum { OffSkip = 'o', NoSkip = 'n', CrSkip = 'r', SpcSkip = 's', } SkipMode; typedef struct _pInfo *PInfo; typedef struct _parseCallbacks { void (*instruct)(PInfo pi, const char *target, Attr attrs, const char *content); void (*add_doctype)(PInfo pi, const char *docType); void (*add_comment)(PInfo pi, const char *comment); void (*add_cdata)(PInfo pi, const char *cdata, size_t len); void (*add_text)(PInfo pi, char *text, int closed); void (*add_element)(PInfo pi, const char *ename, Attr attrs, int hasChildren); void (*end_element)(PInfo pi, const char *ename); void (*finish)(PInfo pi); } *ParseCallbacks; typedef struct _circArray { VALUE obj_array[1024]; VALUE *objs; unsigned long size; /* allocated size or initial array size */ unsigned long cnt; } *CircArray; typedef struct _options { char encoding[64]; /* encoding, stored in the option to avoid GC invalidation in default values */ char margin[128]; /* left margin for dumping */ int indent; /* indention for dump, default 2 */ int trace; /* trace level */ char margin_len; /* margin length */ char with_dtd; /* YesNo */ char with_xml; /* YesNo */ char with_instruct; /* YesNo */ char circular; /* YesNo */ char xsd_date; /* YesNo */ char mode; /* LoadMode */ char effort; /* Effort */ char sym_keys; /* symbolize keys */ char skip; /* skip mode */ char smart; /* YesNo sax smart mode */ char convert_special;/* boolean true or false */ char allow_invalid; /* YesNo */ char inv_repl[12]; /* max 10 valid characters, first character is the length */ char strip_ns[64]; /* namespace to strip, \0 is no-strip, \* is all, else only matches */ struct _hints *html_hints; /* html hints */ VALUE attr_key_mod; VALUE element_key_mod; #if HAS_ENCODING_SUPPORT rb_encoding *rb_enc; #elif HAS_PRIVATE_ENCODING VALUE rb_enc; #else void *rb_enc; #endif } *Options; /* parse information structure */ struct _pInfo { struct _helperStack helpers; struct _err err; char *str; //buffer being read from char *end; // end of original string char *s; // current position in buffer VALUE obj; ParseCallbacks pcb; CircArray circ_array; unsigned long id; //set for text types when cirs_array is set Options options; char last; // last character read, rarely set }; extern VALUE ox_parse(char *xml, size_t len, ParseCallbacks pcb, char **endp, Options options, Err err); extern void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line); extern void ox_sax_define(void); extern char* ox_write_obj_to_str(VALUE obj, Options copts); extern void ox_write_obj_to_file(VALUE obj, const char *path, Options copts); extern struct _options ox_default_options; extern VALUE Ox; extern ID ox_abort_id; extern ID ox_at_column_id; extern ID ox_at_content_id; extern ID ox_at_id; extern ID ox_at_line_id; extern ID ox_at_pos_id; extern ID ox_at_value_id; extern ID ox_attr_id; extern ID ox_attr_value_id; extern ID ox_attrs_done_id; extern ID ox_attributes_id; extern ID ox_beg_id; extern ID ox_bigdecimal_id; extern ID ox_call_id; extern ID ox_cdata_id; extern ID ox_comment_id; extern ID ox_den_id; extern ID ox_doctype_id; extern ID ox_end_element_id; extern ID ox_end_id; extern ID ox_end_instruct_id; extern ID ox_error_id; extern ID ox_excl_id; extern ID ox_external_encoding_id; extern ID ox_fileno_id; extern ID ox_force_encoding_id; extern ID ox_inspect_id; extern ID ox_instruct_id; extern ID ox_jd_id; extern ID ox_keys_id; extern ID ox_local_id; extern ID ox_mesg_id; extern ID ox_message_id; extern ID ox_new_id; extern ID ox_nodes_id; extern ID ox_num_id; extern ID ox_parse_id; extern ID ox_pos_id; extern ID ox_read_id; extern ID ox_readpartial_id; extern ID ox_start_element_id; extern ID ox_string_id; extern ID ox_text_id; extern ID ox_to_c_id; extern ID ox_to_s_id; extern ID ox_to_sym_id; extern ID ox_tv_sec_id; extern ID ox_tv_nsec_id; extern ID ox_tv_usec_id; extern ID ox_value_id; #if HAS_ENCODING_SUPPORT extern rb_encoding *ox_utf8_encoding; #elif HAS_PRIVATE_ENCODING extern VALUE ox_utf8_encoding; #else extern void *ox_utf8_encoding; #endif extern VALUE ox_empty_string; extern VALUE ox_encoding_sym; extern VALUE ox_indent_sym; extern VALUE ox_size_sym; extern VALUE ox_standalone_sym; extern VALUE ox_sym_bank; // Array extern VALUE ox_version_sym; extern VALUE ox_zero_fixnum; extern VALUE ox_bigdecimal_class; extern VALUE ox_date_class; extern VALUE ox_stringio_class; extern VALUE ox_struct_class; extern VALUE ox_time_class; extern VALUE ox_document_clas; extern VALUE ox_element_clas; extern VALUE ox_instruct_clas; extern VALUE ox_bag_clas; extern VALUE ox_comment_clas; extern VALUE ox_raw_clas; extern VALUE ox_doctype_clas; extern VALUE ox_cdata_clas; extern Cache ox_symbol_cache; extern Cache ox_class_cache; extern Cache ox_attr_cache; extern void ox_init_builder(VALUE ox); #if defined(__cplusplus) #if 0 { /* satisfy cc-mode */ #endif } /* extern "C" { */ #endif #endif /* OX_H */ ox-2.11.0/ext/ox/builder.c0000644000004100000410000005556613502763477015330 0ustar www-datawww-data/* builder.c * Copyright (c) 2011, 2016 Peter Ohler * All rights reserved. */ #include #include #include #include #include "ox.h" #include "buf.h" #include "err.h" #define MAX_DEPTH 128 typedef struct _element { char *name; char buf[64]; int len; bool has_child; bool non_text_child; } *Element; typedef struct _builder { struct _buf buf; int indent; char encoding[64]; int depth; FILE *file; struct _element stack[MAX_DEPTH]; long line; long col; long pos; } *Builder; static VALUE builder_class = Qundef; static const char indent_spaces[] = "\n "; // 128 spaces // The : character is equivalent to 10. Used for replacement characters up to // 10 characters long such as '􏿿'. From // https://www.w3.org/TR/2006/REC-xml11-20060816 #if 0 static const char xml_friendly_chars[257] = "\ :::::::::11::1::::::::::::::::::\ 11611156111111111111111111114141\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111"; #endif // From 2.3 of the XML 1.1 spec. All over 0x20 except <&", > also. Builder // uses double quotes for attributes. static const char xml_attr_chars[257] = "\ :::::::::11::1::::::::::::::::::\ 11611151111111111111111111114141\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111"; // From 3.1 of the XML 1.1 spec. All over 0x20 except <&, > also. static const char xml_element_chars[257] = "\ :::::::::11::1::::::::::::::::::\ 11111151111111111111111111114141\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111"; inline static size_t xml_str_len(const unsigned char *str, size_t len, const char *table) { size_t size = 0; for (; 0 < len; str++, len--) { size += table[*str]; } return size - len * (size_t)'0'; } static void append_indent(Builder b) { if (0 >= b->indent) { return; } if (b->buf.head < b->buf.tail) { int cnt = (b->indent * (b->depth + 1)) + 1; if (sizeof(indent_spaces) <= (size_t)cnt) { cnt = sizeof(indent_spaces) - 1; } buf_append_string(&b->buf, indent_spaces, cnt); b->line++; b->col = cnt - 1; b->pos += cnt; } } static void append_string(Builder b, const char *str, size_t size, const char *table, bool strip_invalid_chars) { size_t xsize = xml_str_len((const unsigned char*)str, size, table); if (size == xsize) { const char *s = str; const char *end = str + size; buf_append_string(&b->buf, str, size); b->col += size; s = strchr(s, '\n'); while (NULL != s) { b->line++; b->col = end - s; s = strchr(s + 1, '\n'); } b->pos += size; } else { char buf[256]; char *end = buf + sizeof(buf) - 1; char *bp = buf; int i = size; int fcnt; for (; '\0' != *str && 0 < i; i--, str++) { if ('1' == (fcnt = table[(unsigned char)*str])) { if (end <= bp) { buf_append_string(&b->buf, buf, bp - buf); bp = buf; } if ('\n' == *str) { b->line++; b->col = 1; } else { b->col++; } b->pos++; *bp++ = *str; } else { b->pos += fcnt - '0'; b->col += fcnt - '0'; if (buf < bp) { buf_append_string(&b->buf, buf, bp - buf); bp = buf; } switch (*str) { case '"': buf_append_string(&b->buf, """, 6); break; case '&': buf_append_string(&b->buf, "&", 5); break; case '\'': buf_append_string(&b->buf, "'", 6); break; case '<': buf_append_string(&b->buf, "<", 4); break; case '>': buf_append_string(&b->buf, ">", 4); break; default: // Must be one of the invalid characters. if (!strip_invalid_chars) { rb_raise(ox_syntax_error_class, "'\\#x%02x' is not a valid XML character.", *str); } break; } } } if (buf < bp) { buf_append_string(&b->buf, buf, bp - buf); bp = buf; } } } static void append_sym_str(Builder b, VALUE v) { const char *s; int len; switch (rb_type(v)) { case T_STRING: s = StringValuePtr(v); len = RSTRING_LEN(v); break; case T_SYMBOL: s = rb_id2name(SYM2ID(v)); len = strlen(s); break; default: rb_raise(ox_arg_error_class, "expected a Symbol or String"); break; } append_string(b, s, len, xml_element_chars, false); } static void i_am_a_child(Builder b, bool is_text) { if (0 <= b->depth) { Element e = &b->stack[b->depth]; if (!e->has_child) { e->has_child = true; buf_append(&b->buf, '>'); b->col++; b->pos++; } if (!is_text) { e->non_text_child = true; } } } static int append_attr(VALUE key, VALUE value, Builder b) { buf_append(&b->buf, ' '); b->col++; b->pos++; append_sym_str(b, key); buf_append_string(&b->buf, "=\"", 2); b->col += 2; b->pos += 2; Check_Type(value, T_STRING); append_string(b, StringValuePtr(value), (int)RSTRING_LEN(value), xml_attr_chars, false); buf_append(&b->buf, '"'); b->col++; b->pos++; return ST_CONTINUE; } static void init(Builder b, int fd, int indent, long initial_size) { buf_init(&b->buf, fd, initial_size); b->indent = indent; *b->encoding = '\0'; b->depth = -1; b->line = 1; b->col = 1; b->pos = 0; } static void builder_free(void *ptr) { Builder b; Element e; int d; if (0 == ptr) { return; } b = (Builder)ptr; buf_cleanup(&b->buf); for (e = b->stack, d = b->depth; 0 < d; d--, e++) { if (e->name != e->buf) { free(e->name); } } xfree(ptr); } static void pop(Builder b) { Element e; if (0 > b->depth) { rb_raise(ox_arg_error_class, "closed too many elements"); } e = &b->stack[b->depth]; b->depth--; if (e->has_child) { if (e->non_text_child) { append_indent(b); } buf_append_string(&b->buf, "buf, e->name, e->len); buf_append(&b->buf, '>'); b->col += e->len + 3; b->pos += e->len + 3; if (e->buf != e->name) { free(e->name); e->name = 0; } } else { buf_append_string(&b->buf, "/>", 2); b->col += 2; b->pos += 2; } } static void bclose(Builder b) { while (0 <= b->depth) { pop(b); } if (0 <= b->indent) { buf_append(&b->buf, '\n'); } b->line++; b->col = 1; b->pos++; buf_finish(&b->buf); if (NULL != b->file) { fclose(b->file); } } static VALUE to_s(Builder b) { volatile VALUE rstr; if (0 != b->buf.fd) { rb_raise(ox_arg_error_class, "can not create a String with a stream or file builder."); } if (0 <= b->indent && '\n' != *(b->buf.tail - 1)) { buf_append(&b->buf, '\n'); b->line++; b->col = 1; b->pos++; } *b->buf.tail = '\0'; // for debugging rstr = rb_str_new(b->buf.head, buf_len(&b->buf)); if ('\0' != *b->encoding) { #if HAS_ENCODING_SUPPORT rb_enc_associate(rstr, rb_enc_find(b->encoding)); #endif } return rstr; } /* call-seq: new(options) * * Creates a new Builder that will write to a string that can be retrieved with * the to_s() method. If a block is given it is executed with a single parameter * which is the builder instance. The return value is then the generated string. * * - +options+ - (Hash) formating options * - +:indent+ (Fixnum) indentaion level, negative values excludes terminating newline * - +:size+ (Fixnum) the initial size of the string buffer */ static VALUE builder_new(int argc, VALUE *argv, VALUE self) { Builder b = ALLOC(struct _builder); int indent = ox_default_options.indent; long buf_size = 0; if (1 == argc) { volatile VALUE v; rb_check_type(*argv, T_HASH); if (Qnil != (v = rb_hash_lookup(*argv, ox_indent_sym))) { #ifdef RUBY_INTEGER_UNIFICATION if (rb_cInteger != rb_obj_class(v)) { #else if (rb_cFixnum != rb_obj_class(v)) { #endif rb_raise(ox_parse_error_class, ":indent must be a fixnum.\n"); } indent = NUM2INT(v); } if (Qnil != (v = rb_hash_lookup(*argv, ox_size_sym))) { #ifdef RUBY_INTEGER_UNIFICATION if (rb_cInteger != rb_obj_class(v)) { #else if (rb_cFixnum != rb_obj_class(v)) { #endif rb_raise(ox_parse_error_class, ":size must be a fixnum.\n"); } buf_size = NUM2LONG(v); } } b->file = NULL; init(b, 0, indent, buf_size); if (rb_block_given_p()) { volatile VALUE rb = Data_Wrap_Struct(builder_class, NULL, builder_free, b); rb_yield(rb); bclose(b); return to_s(b); } else { return Data_Wrap_Struct(builder_class, NULL, builder_free, b); } } /* call-seq: file(filename, options) * * Creates a new Builder that will write to a file. * * - +filename+ (String) filename to write to * - +options+ - (Hash) formating options * - +:indent+ (Fixnum) indentaion level, negative values excludes terminating newline * - +:size+ (Fixnum) the initial size of the string buffer */ static VALUE builder_file(int argc, VALUE *argv, VALUE self) { Builder b = ALLOC(struct _builder); int indent = ox_default_options.indent; long buf_size = 0; FILE *f; if (1 > argc) { rb_raise(ox_arg_error_class, "missing filename"); } Check_Type(*argv, T_STRING); if (NULL == (f = fopen(StringValuePtr(*argv), "w"))) { xfree(b); rb_raise(rb_eIOError, "%s\n", strerror(errno)); } if (2 == argc) { volatile VALUE v; rb_check_type(argv[1], T_HASH); if (Qnil != (v = rb_hash_lookup(argv[1], ox_indent_sym))) { #ifdef RUBY_INTEGER_UNIFICATION if (rb_cInteger != rb_obj_class(v)) { #else if (rb_cFixnum != rb_obj_class(v)) { #endif rb_raise(ox_parse_error_class, ":indent must be a fixnum.\n"); } indent = NUM2INT(v); } if (Qnil != (v = rb_hash_lookup(argv[1], ox_size_sym))) { #ifdef RUBY_INTEGER_UNIFICATION if (rb_cInteger != rb_obj_class(v)) { #else if (rb_cFixnum != rb_obj_class(v)) { #endif rb_raise(ox_parse_error_class, ":size must be a fixnum.\n"); } buf_size = NUM2LONG(v); } } b->file = f; init(b, fileno(f), indent, buf_size); if (rb_block_given_p()) { volatile VALUE rb = Data_Wrap_Struct(builder_class, NULL, builder_free, b); rb_yield(rb); bclose(b); return Qnil; } else { return Data_Wrap_Struct(builder_class, NULL, builder_free, b); } } /* call-seq: io(io, options) * * Creates a new Builder that will write to an IO instance. * * - +io+ (String) IO to write to * - +options+ - (Hash) formating options * - +:indent+ (Fixnum) indentaion level, negative values excludes terminating newline * - +:size+ (Fixnum) the initial size of the string buffer */ static VALUE builder_io(int argc, VALUE *argv, VALUE self) { Builder b = ALLOC(struct _builder); int indent = ox_default_options.indent; long buf_size = 0; int fd; volatile VALUE v; if (1 > argc) { rb_raise(ox_arg_error_class, "missing IO object"); } if (!rb_respond_to(*argv, ox_fileno_id) || Qnil == (v = rb_funcall(*argv, ox_fileno_id, 0)) || 0 == (fd = FIX2INT(v))) { rb_raise(rb_eIOError, "expected an IO that has a fileno."); } if (2 == argc) { volatile VALUE v; rb_check_type(argv[1], T_HASH); if (Qnil != (v = rb_hash_lookup(argv[1], ox_indent_sym))) { #ifdef RUBY_INTEGER_UNIFICATION if (rb_cInteger != rb_obj_class(v)) { #else if (rb_cFixnum != rb_obj_class(v)) { #endif rb_raise(ox_parse_error_class, ":indent must be a fixnum.\n"); } indent = NUM2INT(v); } if (Qnil != (v = rb_hash_lookup(argv[1], ox_size_sym))) { #ifdef RUBY_INTEGER_UNIFICATION if (rb_cInteger != rb_obj_class(v)) { #else if (rb_cFixnum != rb_obj_class(v)) { #endif rb_raise(ox_parse_error_class, ":size must be a fixnum.\n"); } buf_size = NUM2LONG(v); } } b->file = NULL; init(b, fd, indent, buf_size); if (rb_block_given_p()) { volatile VALUE rb = Data_Wrap_Struct(builder_class, NULL, builder_free, b); rb_yield(rb); bclose(b); return Qnil; } else { return Data_Wrap_Struct(builder_class, NULL, builder_free, b); } } /* call-seq: instruct(decl,options) * * Adds the top level element. * * - +decl+ - (String) 'xml' expected * - +options+ - (Hash) version or encoding */ static VALUE builder_instruct(int argc, VALUE *argv, VALUE self) { Builder b = (Builder)DATA_PTR(self); i_am_a_child(b, false); append_indent(b); if (0 == argc) { buf_append_string(&b->buf, "", 7); b->col += 7; b->pos += 7; } else { volatile VALUE v; buf_append_string(&b->buf, "col += 2; b->pos += 2; append_sym_str(b, *argv); if (1 < argc && rb_cHash == rb_obj_class(argv[1])) { int len; if (Qnil != (v = rb_hash_lookup(argv[1], ox_version_sym))) { if (rb_cString != rb_obj_class(v)) { rb_raise(ox_parse_error_class, ":version must be a Symbol.\n"); } len = (int)RSTRING_LEN(v); buf_append_string(&b->buf, " version=\"", 10); buf_append_string(&b->buf, StringValuePtr(v), len); buf_append(&b->buf, '"'); b->col += len + 11; b->pos += len + 11; } if (Qnil != (v = rb_hash_lookup(argv[1], ox_encoding_sym))) { if (rb_cString != rb_obj_class(v)) { rb_raise(ox_parse_error_class, ":encoding must be a Symbol.\n"); } len = (int)RSTRING_LEN(v); buf_append_string(&b->buf, " encoding=\"", 11); buf_append_string(&b->buf, StringValuePtr(v), len); buf_append(&b->buf, '"'); b->col += len + 12; b->pos += len + 12; strncpy(b->encoding, StringValuePtr(v), sizeof(b->encoding)); b->encoding[sizeof(b->encoding) - 1] = '\0'; } if (Qnil != (v = rb_hash_lookup(argv[1], ox_standalone_sym))) { if (rb_cString != rb_obj_class(v)) { rb_raise(ox_parse_error_class, ":standalone must be a Symbol.\n"); } len = (int)RSTRING_LEN(v); buf_append_string(&b->buf, " standalone=\"", 13); buf_append_string(&b->buf, StringValuePtr(v), len); buf_append(&b->buf, '"'); b->col += len + 14; b->pos += len + 14; } } buf_append_string(&b->buf, "?>", 2); b->col += 2; b->pos += 2; } return Qnil; } /* call-seq: element(name,attributes) * * Adds an element with the name and attributes provided. If a block is given * then on closing of the block a pop() is called. * * - +name+ - (String) name of the element * - +attributes+ - (Hash) of the element */ static VALUE builder_element(int argc, VALUE *argv, VALUE self) { Builder b = (Builder)DATA_PTR(self); Element e; const char *name; int len; if (1 > argc) { rb_raise(ox_arg_error_class, "missing element name"); } i_am_a_child(b, false); append_indent(b); b->depth++; if (MAX_DEPTH <= b->depth) { rb_raise(ox_arg_error_class, "XML too deeply nested"); } switch (rb_type(*argv)) { case T_STRING: name = StringValuePtr(*argv); len = RSTRING_LEN(*argv); break; case T_SYMBOL: name = rb_id2name(SYM2ID(*argv)); len = strlen(name); break; default: rb_raise(ox_arg_error_class, "expected a Symbol or String for an element name"); break; } e = &b->stack[b->depth]; if (sizeof(e->buf) <= (size_t)len) { e->name = strdup(name); *e->buf = '\0'; } else { strcpy(e->buf, name); e->name = e->buf; } e->len = len; e->has_child = false; e->non_text_child = false; buf_append(&b->buf, '<'); b->col++; b->pos++; append_string(b, e->name, len, xml_element_chars, false); if (1 < argc && T_HASH == rb_type(argv[1])) { rb_hash_foreach(argv[1], append_attr, (VALUE)b); } // Do not close with > or /> yet. That is done with i_am_a_child() or pop(). if (rb_block_given_p()) { rb_yield(self); pop(b); } return Qnil; } /* call-seq: void_element(name,attributes) * * Adds an void element with the name and attributes provided. * * - +name+ - (String) name of the element * - +attributes+ - (Hash) of the element */ static VALUE builder_void_element(int argc, VALUE *argv, VALUE self) { Builder b = (Builder)DATA_PTR(self); const char *name; int len; if (1 > argc) { rb_raise(ox_arg_error_class, "missing element name"); } i_am_a_child(b, false); append_indent(b); switch (rb_type(*argv)) { case T_STRING: name = StringValuePtr(*argv); len = RSTRING_LEN(*argv); break; case T_SYMBOL: name = rb_id2name(SYM2ID(*argv)); len = strlen(name); break; default: rb_raise(ox_arg_error_class, "expected a Symbol or String for an element name"); break; } buf_append(&b->buf, '<'); b->col++; b->pos++; append_string(b, name, len, xml_element_chars, false); if (1 < argc && T_HASH == rb_type(argv[1])) { rb_hash_foreach(argv[1], append_attr, (VALUE)b); } buf_append_string(&b->buf, ">", 1); b->col++;; b->pos++; return Qnil; } /* call-seq: comment(text) * * Adds a comment element to the XML string being formed. * - +text+ - (String) contents of the comment */ static VALUE builder_comment(VALUE self, VALUE text) { Builder b = (Builder)DATA_PTR(self); rb_check_type(text, T_STRING); i_am_a_child(b, false); append_indent(b); buf_append_string(&b->buf, "", 3); b->col += 5; b->pos += 5; return Qnil; } /* call-seq: doctype(text) * * Adds a DOCTYPE element to the XML string being formed. * - +text+ - (String) contents of the doctype */ static VALUE builder_doctype(VALUE self, VALUE text) { Builder b = (Builder)DATA_PTR(self); rb_check_type(text, T_STRING); i_am_a_child(b, false); append_indent(b); buf_append_string(&b->buf, "col += 10; b->pos += 10; append_string(b, StringValuePtr(text), RSTRING_LEN(text), xml_element_chars, false); buf_append(&b->buf, '>'); b->col++; b->pos++; return Qnil; } /* call-seq: text(text) * * Adds a text element to the XML string being formed. * - +text+ - (String) contents of the text field * - +strip_invalid_chars+ - [true|false] strips any characters invalid for XML, defaults to false */ static VALUE builder_text(int argc, VALUE *argv, VALUE self) { Builder b = (Builder)DATA_PTR(self); volatile VALUE v; volatile VALUE strip_invalid_chars; if ((0 == argc) || (argc > 2)) { rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 1..2)", argc); } v = argv[0]; if (2 == argc) { strip_invalid_chars = argv[1]; } else { strip_invalid_chars = Qfalse; } if (T_STRING != rb_type(v)) { v = rb_funcall(v, ox_to_s_id, 0); } i_am_a_child(b, true); append_string(b, StringValuePtr(v), RSTRING_LEN(v), xml_element_chars, RTEST(strip_invalid_chars)); return Qnil; } /* call-seq: cdata(data) * * Adds a CDATA element to the XML string being formed. * - +data+ - (String) contents of the CDATA element */ static VALUE builder_cdata(VALUE self, VALUE data) { Builder b = (Builder)DATA_PTR(self); volatile VALUE v = data; const char *str; const char *s; const char *end; int len; if (T_STRING != rb_type(v)) { v = rb_funcall(v, ox_to_s_id, 0); } str = StringValuePtr(v); len = (int)RSTRING_LEN(v); s = str; end = str + len; i_am_a_child(b, false); append_indent(b); buf_append_string(&b->buf, "col += 9; b->pos += 9; buf_append_string(&b->buf, str, len); b->col += len; s = strchr(s, '\n'); while (NULL != s) { b->line++; b->col = end - s; s = strchr(s + 1, '\n'); } b->pos += len; buf_append_string(&b->buf, "]]>", 3); b->col += 3; b->pos += 3; return Qnil; } /* call-seq: raw(text) * * Adds the provided string directly to the XML without formatting or modifications. * * - +text+ - (String) contents to be added */ static VALUE builder_raw(VALUE self, VALUE text) { Builder b = (Builder)DATA_PTR(self); volatile VALUE v = text; const char *str; const char *s; const char *end; int len; if (T_STRING != rb_type(v)) { v = rb_funcall(v, ox_to_s_id, 0); } str = StringValuePtr(v); len = (int)RSTRING_LEN(v); s = str; end = str + len; i_am_a_child(b, true); buf_append_string(&b->buf, str, len); b->col += len; s = strchr(s, '\n'); while (NULL != s) { b->line++; b->col = end - s; s = strchr(s + 1, '\n'); } b->pos += len; return Qnil; } /* call-seq: to_s() * * Returns the JSON document string in what ever state the construction is at. */ static VALUE builder_to_s(VALUE self) { return to_s((Builder)DATA_PTR(self)); } /* call-seq: line() * * Returns the current line in the output. The first line is line 1. */ static VALUE builder_line(VALUE self) { return LONG2NUM(((Builder)DATA_PTR(self))->line); } /* call-seq: column() * * Returns the current column in the output. The first character in a line is at * column 1. */ static VALUE builder_column(VALUE self) { return LONG2NUM(((Builder)DATA_PTR(self))->col); } /* call-seq: pos() * * Returns the number of bytes written. */ static VALUE builder_pos(VALUE self) { return LONG2NUM(((Builder)DATA_PTR(self))->pos); } /* call-seq: pop() * * Closes the current element. */ static VALUE builder_pop(VALUE self) { pop((Builder)DATA_PTR(self)); return Qnil; } /* call-seq: close() * * Closes the all elements and the document. */ static VALUE builder_close(VALUE self) { bclose((Builder)DATA_PTR(self)); return Qnil; } /* * Document-class: Ox::Builder * * An XML builder. */ void ox_init_builder(VALUE ox) { builder_class = rb_define_class_under(ox, "Builder", rb_cObject); rb_define_module_function(builder_class, "new", builder_new, -1); rb_define_module_function(builder_class, "file", builder_file, -1); rb_define_module_function(builder_class, "io", builder_io, -1); rb_define_method(builder_class, "instruct", builder_instruct, -1); rb_define_method(builder_class, "comment", builder_comment, 1); rb_define_method(builder_class, "doctype", builder_doctype, 1); rb_define_method(builder_class, "element", builder_element, -1); rb_define_method(builder_class, "void_element", builder_void_element, -1); rb_define_method(builder_class, "text", builder_text, -1); rb_define_method(builder_class, "cdata", builder_cdata, 1); rb_define_method(builder_class, "raw", builder_raw, 1); rb_define_method(builder_class, "pop", builder_pop, 0); rb_define_method(builder_class, "close", builder_close, 0); rb_define_method(builder_class, "to_s", builder_to_s, 0); rb_define_method(builder_class, "line", builder_line, 0); rb_define_method(builder_class, "column", builder_column, 0); rb_define_method(builder_class, "pos", builder_pos, 0); } ox-2.11.0/ext/ox/base64.c0000644000004100000410000000576513502763477014762 0ustar www-datawww-data/* base64.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include "base64.h" static char digits[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /* invalid or terminating characters are set to 'X' or \x58 */ static uchar s_digits[256] = "\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x3E\x58\x58\x58\x3F\ \x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x58\x58\x58\x58\x58\x58\ \x58\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\ \x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x58\x58\x58\x58\x58\ \x58\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\ \x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58"; void to_base64(const uchar *src, int len, char *b64) { const uchar *end3; int len3 = len % 3; uchar b1, b2, b3; end3 = src + (len - len3); while (src < end3) { b1 = *src++; b2 = *src++; b3 = *src++; *b64++ = digits[(uchar)(b1 >> 2)]; *b64++ = digits[(uchar)(((b1 & 0x03) << 4) | (b2 >> 4))]; *b64++ = digits[(uchar)(((b2 & 0x0F) << 2) | (b3 >> 6))]; *b64++ = digits[(uchar)(b3 & 0x3F)]; } if (1 == len3) { b1 = *src++; *b64++ = digits[b1 >> 2]; *b64++ = digits[(b1 & 0x03) << 4]; *b64++ = '='; *b64++ = '='; } else if (2 == len3) { b1 = *src++; b2 = *src++; *b64++ = digits[b1 >> 2]; *b64++ = digits[((b1 & 0x03) << 4) | (b2 >> 4)]; *b64++ = digits[(b2 & 0x0F) << 2]; *b64++ = '='; } *b64 = '\0'; } unsigned long b64_orig_size(const char *text) { const char *start = text; unsigned long size = 0; if ('\0' != *text) { for (; 0 != *text; text++) { } size = (text - start) * 3 / 4; text--; if ('=' == *text) { size--; text--; if ('=' == *text) { size--; } } } return size; } void from_base64(const char *b64, uchar *str) { uchar b0, b1, b2, b3; while (1) { if ('X' == (b0 = s_digits[(uchar)*b64++])) { break; } if ('X' == (b1 = s_digits[(uchar)*b64++])) { break; } *str++ = (b0 << 2) | ((b1 >> 4) & 0x03); if ('X' == (b2 = s_digits[(uchar)*b64++])) { break; } *str++ = (b1 << 4) | ((b2 >> 2) & 0x0F); if ('X' == (b3 = s_digits[(uchar)*b64++])) { break; } *str++ = (b2 << 6) | b3; } *str = '\0'; } ox-2.11.0/ext/ox/dump.c0000644000004100000410000010110413502763477014623 0ustar www-datawww-data/* dump.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include #include "base64.h" #include "cache8.h" #include "ox.h" #define USE_B64 0 #define MAX_DEPTH 1000 typedef unsigned long ulong; typedef struct _str { const char *str; size_t len; } *Str; typedef struct _element { struct _str clas; struct _str attr; unsigned long id; int indent; /* < 0 indicates no \n */ int closed; char type; } *Element; typedef struct _out { void (*w_start)(struct _out *out, Element e); void (*w_end)(struct _out *out, Element e); void (*w_time)(struct _out *out, VALUE obj); char *buf; char *end; char *cur; Cache8 circ_cache; unsigned long circ_cnt; int indent; int depth; /* used by dumpHash */ Options opts; VALUE obj; } *Out; static void dump_obj_to_xml(VALUE obj, Options copts, Out out); static void dump_first_obj(VALUE obj, Out out); static void dump_obj(ID aid, VALUE obj, int depth, Out out); static void dump_gen_doc(VALUE obj, int depth, Out out); static void dump_gen_element(VALUE obj, int depth, Out out); static void dump_gen_instruct(VALUE obj, int depth, Out out); static int dump_gen_attr(VALUE key, VALUE value, Out out); static int dump_gen_nodes(VALUE obj, int depth, Out out); static void dump_gen_val_node(VALUE obj, int depth, const char *pre, size_t plen, const char *suf, size_t slen, Out out); static void dump_start(Out out, Element e); static void dump_end(Out out, Element e); static void grow(Out out, size_t len); static void dump_value(Out out, const char *value, size_t size); static void dump_str_value(Out out, const char *value, size_t size, const char *table); static int dump_var(ID key, VALUE value, Out out); static void dump_num(Out out, VALUE obj); static void dump_date(Out out, VALUE obj); static void dump_time_thin(Out out, VALUE obj); static void dump_time_xsd(Out out, VALUE obj); static int dump_hash(VALUE key, VALUE value, Out out); static int is_xml_friendly(const uchar *str, int len, const char *table); static const char hex_chars[17] = "0123456789abcdef"; // The : character is equivalent to 10. Used for replacement characters up to 10 // characters long such as '􏿿'. static const char xml_friendly_chars[257] = "\ :::::::::11::1::::::::::::::::::\ 11611156111111111111111111114141\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111"; static const char xml_quote_chars[257] = "\ :::::::::11::1::::::::::::::::::\ 11611151111111111111111111114141\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111"; static const char xml_element_chars[257] = "\ :::::::::11::1::::::::::::::::::\ 11111151111111111111111111114141\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111"; inline static int is_xml_friendly(const uchar *str, int len, const char *table) { for (; 0 < len; str++, len--) { if ('1' != table[*str]) { return 0; } } return 1; } inline static size_t xml_str_len(const uchar *str, size_t len, const char *table) { size_t size = 0; for (; 0 < len; str++, len--) { size += xml_friendly_chars[*str]; } return size - len * (size_t)'0'; } inline static void dump_hex(uchar c, Out out) { uchar d = (c >> 4) & 0x0F; *out->cur++ = hex_chars[d]; d = c & 0x0F; *out->cur++ = hex_chars[d]; } static Type obj_class_code(VALUE obj) { VALUE clas = rb_obj_class(obj); switch (rb_type(obj)) { case T_NIL: return NilClassCode; case T_ARRAY: return ArrayCode; case T_HASH: return HashCode; case T_TRUE: return TrueClassCode; case T_FALSE: return FalseClassCode; case T_FIXNUM: return FixnumCode; case T_FLOAT: return FloatCode; case T_STRING: return (is_xml_friendly((uchar*)StringValuePtr(obj), (int)RSTRING_LEN(obj), xml_element_chars)) ? StringCode : String64Code; case T_SYMBOL: { const char *sym = rb_id2name(SYM2ID(obj)); return (is_xml_friendly((uchar*)sym, (int)strlen(sym), xml_element_chars)) ? SymbolCode : Symbol64Code; } case T_DATA: return (rb_cTime == clas) ? TimeCode : ((ox_date_class == clas) ? DateCode : 0); case T_STRUCT: return (rb_cRange == clas) ? RangeCode : StructCode; case T_OBJECT: return (ox_document_clas == clas || ox_element_clas == clas) ? RawCode : ObjectCode; case T_REGEXP: return RegexpCode; case T_BIGNUM: return BignumCode; #ifdef T_COMPLEX case T_COMPLEX: return ComplexCode; #endif #ifdef T_RATIONAL case T_RATIONAL: return RationalCode; #endif case T_CLASS: return ClassCode; default: return 0; } } inline static void fill_indent(Out out, int cnt) { if (0 <= cnt) { *out->cur++ = '\n'; if (0 < out->opts->margin_len) { memcpy(out->cur, out->opts->margin, out->opts->margin_len); out->cur += out->opts->margin_len; } for (; 0 < cnt; cnt--) { *out->cur++ = ' '; } } } inline static void fill_value(Out out, const char *value, size_t len) { if (6 < len) { memcpy(out->cur, value, len); out->cur += len; } else { for (; 0 < len; len--, value++) { *out->cur++ = *value; } } } inline static void fill_attr(Out out, char name, const char *value, size_t len) { *out->cur++ = ' '; *out->cur++ = name; *out->cur++ = '='; *out->cur++ = '"'; if (6 < len) { memcpy(out->cur, value, len); out->cur += len; } else { for (; 0 < len; len--, value++) { *out->cur++ = *value; } } *out->cur++ = '"'; } inline static const char* ulong2str(ulong num, char *end) { char *b; *end-- = '\0'; for (b = end; 0 < num || b == end; num /= 10, b--) { *b = (num % 10) + '0'; } b++; return b; } static int check_circular(Out out, VALUE obj, Element e) { slot_t *slot; slot_t id; int result; if (0 == (id = ox_cache8_get(out->circ_cache, obj, &slot))) { out->circ_cnt++; id = out->circ_cnt; *slot = id; e->id = id; result = 0; } else { e->type = RefCode; e->clas.len = 0; e->clas.str = 0; e->closed = 1; e->id = id; out->w_start(out, e); result = 1; } return result; } static void grow(Out out, size_t len) { size_t size = out->end - out->buf; long pos = out->cur - out->buf; size *= 2; if (size <= len * 2 + pos) { size += len; } REALLOC_N(out->buf, char, size + 10); /* 10 extra for terminator character plus extra (paranoid) */ out->end = out->buf + size; out->cur = out->buf + pos; } static void dump_start(Out out, Element e) { size_t size = e->indent + 4 + out->opts->margin_len; if (0 < e->attr.len) { /* a="attr" */ size += e->attr.len + 5; } if (0 < e->clas.len) { /* c="class" */ size += e->clas.len + 5; } if (0 < e->id) { /* i="id" */ size += 24; /* over estimate, 19 digits */ } if (out->end - out->cur <= (long)size) { grow(out, size); } if (out->buf + out->opts->margin_len < out->cur) { fill_indent(out, e->indent); } *out->cur++ = '<'; *out->cur++ = e->type; if (0 < e->attr.len) { fill_attr(out, 'a', e->attr.str, e->attr.len); } if ((ObjectCode == e->type || ExceptionCode == e->type || StructCode == e->type || ClassCode == e->type) && 0 < e->clas.len) { fill_attr(out, 'c', e->clas.str, e->clas.len); } if (0 < e->id) { char buf[32]; char *end = buf + sizeof(buf) - 1; const char *s = ulong2str(e->id, end); fill_attr(out, 'i', s, end - s); } if (e->closed) { *out->cur++ = '/'; } *out->cur++ = '>'; *out->cur = '\0'; } static void dump_end(Out out, Element e) { size_t size = e->indent + 5 + out->opts->margin_len; if (out->end - out->cur <= (long)size) { grow(out, size); } fill_indent(out, e->indent); *out->cur++ = '<'; *out->cur++ = '/'; *out->cur++ = e->type; *out->cur++ = '>'; *out->cur = '\0'; } inline static void dump_value(Out out, const char *value, size_t size) { if (out->end - out->cur <= (long)size) { grow(out, size); } if (6 < size) { memcpy(out->cur, value, size); out->cur += size; } else { for (; 0 < size; size--, value++) { *out->cur++ = *value; } } *out->cur = '\0'; } inline static void dump_str_value(Out out, const char *value, size_t size, const char *table) { size_t xsize = xml_str_len((const uchar*)value, size, table); if (out->end - out->cur <= (long)xsize) { grow(out, xsize); } for (; 0 < size; size--, value++) { if ('1' == table[(uchar)*value]) { *out->cur++ = *value; } else { switch (*value) { case '"': *out->cur++ = '&'; *out->cur++ = 'q'; *out->cur++ = 'u'; *out->cur++ = 'o'; *out->cur++ = 't'; *out->cur++ = ';'; break; case '&': *out->cur++ = '&'; *out->cur++ = 'a'; *out->cur++ = 'm'; *out->cur++ = 'p'; *out->cur++ = ';'; break; case '\'': *out->cur++ = '&'; *out->cur++ = 'a'; *out->cur++ = 'p'; *out->cur++ = 'o'; *out->cur++ = 's'; *out->cur++ = ';'; break; case '<': *out->cur++ = '&'; *out->cur++ = 'l'; *out->cur++ = 't'; *out->cur++ = ';'; break; case '>': *out->cur++ = '&'; *out->cur++ = 'g'; *out->cur++ = 't'; *out->cur++ = ';'; break; default: // Must be one of the invalid characters. if (StrictEffort == out->opts->effort) { rb_raise(ox_syntax_error_class, "'\\#x%02x' is not a valid XML character.", *value); } if (Yes == out->opts->allow_invalid) { *out->cur++ = '&'; *out->cur++ = '#'; *out->cur++ = 'x'; *out->cur++ = '0'; *out->cur++ = '0'; dump_hex(*value, out); *out->cur++ = ';'; } else if ('\0' != *out->opts->inv_repl) { // If the empty string then ignore. The first character of // the replacement is the length. memcpy(out->cur, out->opts->inv_repl + 1, (size_t)*out->opts->inv_repl); out->cur += *out->opts->inv_repl; } break; } } } *out->cur = '\0'; } inline static void dump_num(Out out, VALUE obj) { char buf[32]; char *b = buf + sizeof(buf) - 1; long num = NUM2LONG(obj); int neg = 0; if (0 > num) { neg = 1; num = -num; } *b-- = '\0'; if (0 < num) { for (; 0 < num; num /= 10, b--) { *b = (num % 10) + '0'; } if (neg) { *b = '-'; } else { b++; } } else { *b = '0'; } if (out->end - out->cur <= (long)(sizeof(buf) - (b - buf))) { grow(out, sizeof(buf) - (b - buf)); } for (; '\0' != *b; b++) { *out->cur++ = *b; } *out->cur = '\0'; } static void dump_time_thin(Out out, VALUE obj) { char buf[64]; char *b = buf + sizeof(buf) - 1; #if HAS_RB_TIME_TIMESPEC struct timespec ts = rb_time_timespec(obj); time_t sec = ts.tv_sec; long nsec = ts.tv_nsec; #else time_t sec = NUM2LONG(rb_funcall2(obj, ox_tv_sec_id, 0, 0)); #if HAS_NANO_TIME long nsec = NUM2LONG(rb_funcall2(obj, ox_tv_nsec_id, 0, 0)); #else long nsec = NUM2LONG(rb_funcall2(obj, ox_tv_usec_id, 0, 0)) * 1000; #endif #endif char *dot = b - 10; long size; *b-- = '\0'; for (; dot < b; b--, nsec /= 10) { *b = '0' + (nsec % 10); } *b-- = '.'; for (; 0 < sec; b--, sec /= 10) { *b = '0' + (sec % 10); } b++; size = sizeof(buf) - (b - buf) - 1; if (out->end - out->cur <= size) { grow(out, size); } memcpy(out->cur, b, size); out->cur += size; } static void dump_date(Out out, VALUE obj) { char buf[64]; char *b = buf + sizeof(buf) - 1; long jd = NUM2LONG(rb_funcall2(obj, ox_jd_id, 0, 0)); long size; *b-- = '\0'; for (; 0 < jd; b--, jd /= 10) { *b = '0' + (jd % 10); } b++; if ('\0' == *b) { b--; *b = '0'; } size = sizeof(buf) - (b - buf) - 1; if (out->end - out->cur <= size) { grow(out, size); } memcpy(out->cur, b, size); out->cur += size; } static void dump_time_xsd(Out out, VALUE obj) { struct tm *tm; #if HAS_RB_TIME_TIMESPEC struct timespec ts = rb_time_timespec(obj); time_t sec = ts.tv_sec; long nsec = ts.tv_nsec; #else time_t sec = NUM2LONG(rb_funcall2(obj, ox_tv_sec_id, 0, 0)); #if HAS_NANO_TIME long nsec = NUM2LONG(rb_funcall2(obj, ox_tv_nsec_id, 0, 0)); #else long nsec = NUM2LONG(rb_funcall2(obj, ox_tv_usec_id, 0, 0)) * 1000; #endif #endif int tzhour, tzmin; char tzsign = '+'; if (out->end - out->cur <= 33) { grow(out, 33); } /* 2010-07-09T10:47:45.895826+09:00 */ tm = localtime(&sec); #if HAS_TM_GMTOFF if (0 > tm->tm_gmtoff) { tzsign = '-'; tzhour = (int)(tm->tm_gmtoff / -3600); tzmin = (int)(tm->tm_gmtoff / -60) - (tzhour * 60); } else { tzhour = (int)(tm->tm_gmtoff / 3600); tzmin = (int)(tm->tm_gmtoff / 60) - (tzhour * 60); } #else tzhour = 0; tzmin = 0; #endif /* TBD replace with more efficient printer */ out->cur += sprintf(out->cur, "%04d-%02d-%02dT%02d:%02d:%02d.%06ld%c%02d:%02d", tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, nsec / 1000, tzsign, tzhour, tzmin); } static void dump_first_obj(VALUE obj, Out out) { char buf[128]; Options copts = out->opts; int cnt; if (Yes == copts->with_xml) { if (0 < copts->margin_len) { dump_value(out, copts->margin, copts->margin_len); } if ('\0' == *copts->encoding) { dump_value(out, "", 21); } else { cnt = snprintf(buf, sizeof(buf), "", copts->encoding); dump_value(out, buf, cnt); } } if (Yes == copts->with_instruct) { if (out->buf < out->cur) { dump_value(out, "\n", 1); } if (0 < copts->margin_len) { dump_value(out, copts->margin, copts->margin_len); } cnt = snprintf(buf, sizeof(buf), "", (Yes == copts->circular) ? " circular=\"yes\"" : ((No == copts->circular) ? " circular=\"no\"" : ""), (Yes == copts->xsd_date) ? " xsd_date=\"yes\"" : ((No == copts->xsd_date) ? " xsd_date=\"no\"" : "")); dump_value(out, buf, cnt); } if (Yes == copts->with_dtd) { if (0 < copts->margin_len) { dump_value(out, copts->margin, copts->margin_len); } cnt = snprintf(buf, sizeof(buf), "%s", (out->buf < out->cur) ? "\n" : "", obj_class_code(obj)); dump_value(out, buf, cnt); } if (0 < copts->margin_len) { dump_value(out, copts->margin, copts->margin_len); } dump_obj(0, obj, 0, out); } static void dump_obj(ID aid, VALUE obj, int depth, Out out) { struct _element e; VALUE prev_obj = out->obj; char value_buf[64]; int cnt; if (MAX_DEPTH < depth) { rb_raise(rb_eSysStackError, "maximum depth exceeded"); } out->obj = obj; if (0 == aid) { e.attr.str = 0; e.attr.len = 0; } else { e.attr.str = rb_id2name(aid); // Ruby 2.3 started to return NULL for some IDs so check for // NULL. Ignore if NULL aid. if (NULL == e.attr.str) { return; } e.attr.len = strlen(e.attr.str); } e.closed = 0; if (0 == depth) { e.indent = (0 <= out->indent) ? 0 : -1; } else if (0 > out->indent) { e.indent = -1; } else if (0 == out->indent) { e.indent = 0; } else { e.indent = depth * out->indent; } e.id = 0; e.clas.len = 0; e.clas.str = 0; switch (rb_type(obj)) { case T_NIL: e.type = NilClassCode; e.closed = 1; out->w_start(out, &e); break; case T_ARRAY: if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } cnt = (int)RARRAY_LEN(obj); e.type = ArrayCode; e.closed = (0 >= cnt); out->w_start(out, &e); if (!e.closed) { const VALUE *np = RARRAY_PTR(obj); int i; int d2 = depth + 1; for (i = cnt; 0 < i; i--, np++) { dump_obj(0, *np, d2, out); } out->w_end(out, &e); } break; case T_HASH: if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } cnt = (int)RHASH_SIZE(obj); e.type = HashCode; e.closed = (0 >= cnt); out->w_start(out, &e); if (0 < cnt) { unsigned int od = out->depth; out->depth = depth + 1; rb_hash_foreach(obj, dump_hash, (VALUE)out); out->depth = od; out->w_end(out, &e); } break; case T_TRUE: e.type = TrueClassCode; e.closed = 1; out->w_start(out, &e); break; case T_FALSE: e.type = FalseClassCode; e.closed = 1; out->w_start(out, &e); break; case T_FIXNUM: e.type = FixnumCode; out->w_start(out, &e); dump_num(out, obj); e.indent = -1; out->w_end(out, &e); break; case T_FLOAT: e.type = FloatCode; cnt = snprintf(value_buf, sizeof(value_buf), "%0.16g", rb_num2dbl(obj)); out->w_start(out, &e); dump_value(out, value_buf, cnt); e.indent = -1; out->w_end(out, &e); break; case T_STRING: { const char *str; if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } str = StringValuePtr(obj); cnt = (int)RSTRING_LEN(obj); #if USE_B64 if (is_xml_friendly((uchar*)str, cnt)) { e.type = StringCode; out->w_start(out, &e); dump_str_value(out, str, cnt, '<'); e.indent = -1; out->w_end(out, &e); } else { ulong size = b64_size(cnt); char *b64 = ALLOCA_N(char, size + 1); e.type = String64Code; to_base64((uchar*)str, cnt, b64); out->w_start(out, &e); dump_value(out, b64, size); e.indent = -1; out->w_end(out, &e); } #else e.type = StringCode; out->w_start(out, &e); dump_str_value(out, str, cnt, xml_element_chars); e.indent = -1; out->w_end(out, &e); #endif break; } case T_SYMBOL: { const char *sym = rb_id2name(SYM2ID(obj)); cnt = (int)strlen(sym); #if USE_B64 if (is_xml_friendly((uchar*)sym, cnt)) { e.type = SymbolCode; out->w_start(out, &e); dump_str_value(out, sym, cnt, '<'); e.indent = -1; out->w_end(out, &e); } else { ulong size = b64_size(cnt); char *b64 = ALLOCA_N(char, size + 1); e.type = Symbol64Code; to_base64((uchar*)sym, cnt, b64); out->w_start(out, &e); dump_value(out, b64, size); e.indent = -1; out->w_end(out, &e); } #else e.type = SymbolCode; out->w_start(out, &e); dump_str_value(out, sym, cnt, xml_element_chars); e.indent = -1; out->w_end(out, &e); #endif break; } case T_DATA: { VALUE clas; clas = rb_obj_class(obj); if (rb_cTime == clas) { e.type = TimeCode; out->w_start(out, &e); out->w_time(out, obj); e.indent = -1; out->w_end(out, &e); } else { const char *classname = rb_class2name(clas); if (0 == strcmp("Date", classname)) { e.type = DateCode; out->w_start(out, &e); dump_date(out, obj); e.indent = -1; out->w_end(out, &e); } else if (0 == strcmp("BigDecimal", classname)) { volatile VALUE rs = rb_funcall(obj, ox_to_s_id, 0); e.type = BigDecimalCode; out->w_start(out, &e); dump_value(out, StringValuePtr(rs), RSTRING_LEN(rs)); e.indent = -1; out->w_end(out, &e); } else { if (StrictEffort == out->opts->effort) { rb_raise(rb_eNotImpError, "Failed to dump T_DATA %s\n", classname); } else { e.type = NilClassCode; e.closed = 1; out->w_start(out, &e); } } } break; } case T_STRUCT: { #if HAS_RSTRUCT VALUE clas; if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } clas = rb_obj_class(obj); if (rb_cRange == clas) { VALUE beg = RSTRUCT_GET(obj, 0); VALUE end = RSTRUCT_GET(obj, 1); VALUE excl = RSTRUCT_GET(obj, 2); int d2 = depth + 1; e.type = RangeCode; e.clas.len = 5; e.clas.str = "Range"; out->w_start(out, &e); dump_obj(ox_beg_id, beg, d2, out); dump_obj(ox_end_id, end, d2, out); dump_obj(ox_excl_id, excl, d2, out); out->w_end(out, &e); } else { char num_buf[16]; int d2 = depth + 1; #if UNIFY_FIXNUM_AND_BIGNUM long i; long cnt = NUM2LONG(rb_struct_size(obj)); #else // UNIFY_FIXNUM_AND_INTEGER int i; int cnt = (int)RSTRUCT_LEN(obj); #endif // UNIFY_FIXNUM_AND_INTEGER e.type = StructCode; e.clas.str = rb_class2name(clas); e.clas.len = strlen(e.clas.str); out->w_start(out, &e); for (i = 0; i < cnt; i++) { VALUE v = RSTRUCT_GET(obj, i); dump_obj(rb_intern(ulong2str(i, num_buf + sizeof(num_buf) - 1)), v, d2, out); } out->w_end(out, &e); } #else e.type = NilClassCode; e.closed = 1; out->w_start(out, &e); #endif break; } case T_OBJECT: { VALUE clas; if (0 != out->circ_cache && check_circular(out, obj, &e)) { break; } clas = rb_obj_class(obj); e.clas.str = rb_class2name(clas); e.clas.len = strlen(e.clas.str); if (ox_document_clas == clas) { e.type = RawCode; out->w_start(out, &e); dump_gen_doc(obj, depth + 1, out); out->w_end(out, &e); } else if (ox_element_clas == clas) { e.type = RawCode; out->w_start(out, &e); dump_gen_element(obj, depth + 1, out); out->w_end(out, &e); } else { /* Object */ #if HAS_IVAR_HELPERS e.type = (Qtrue == rb_obj_is_kind_of(obj, rb_eException)) ? ExceptionCode : ObjectCode; cnt = (int)rb_ivar_count(obj); e.closed = (0 >= cnt); out->w_start(out, &e); if (0 < cnt) { unsigned int od = out->depth; out->depth = depth + 1; rb_ivar_foreach(obj, dump_var, (VALUE)out); out->depth = od; out->w_end(out, &e); } #else volatile VALUE vars = rb_obj_instance_variables(obj); //volatile VALUE vars = rb_funcall2(obj, rb_intern("instance_variables"), 0, 0); e.type = (Qtrue == rb_obj_is_kind_of(obj, rb_eException)) ? ExceptionCode : ObjectCode; cnt = (int)RARRAY_LEN(vars); e.closed = (0 >= cnt); out->w_start(out, &e); if (0 < cnt) { const VALUE *np = RARRAY_PTR(vars); ID vid; unsigned int od = out->depth; int i; out->depth = depth + 1; for (i = cnt; 0 < i; i--, np++) { vid = rb_to_id(*np); dump_var(vid, rb_ivar_get(obj, vid), out); } out->depth = od; out->w_end(out, &e); } #endif } break; } case T_REGEXP: { volatile VALUE rs = rb_funcall2(obj, ox_inspect_id, 0, 0); const char *s = StringValuePtr(rs); cnt = (int)RSTRING_LEN(rs); e.type = RegexpCode; out->w_start(out, &e); #if USE_B64 if (is_xml_friendly((uchar*)s, cnt)) { /*dump_value(out, "/", 1); */ dump_str_value(out, s, cnt, '<'); } else { ulong size = b64_size(cnt); char *b64 = ALLOCA_N(char, size + 1); to_base64((uchar*)s, cnt, b64); dump_value(out, b64, size); } #else dump_str_value(out, s, cnt, xml_element_chars); #endif e.indent = -1; out->w_end(out, &e); break; } case T_BIGNUM: { volatile VALUE rs = rb_big2str(obj, 10); e.type = BignumCode; out->w_start(out, &e); dump_value(out, StringValuePtr(rs), RSTRING_LEN(rs)); e.indent = -1; out->w_end(out, &e); break; } #ifdef T_COMPLEX case T_COMPLEX: e.type = ComplexCode; out->w_start(out, &e); #ifdef RCOMPLEX dump_obj(0, RCOMPLEX(obj)->real, depth + 1, out); dump_obj(0, RCOMPLEX(obj)->imag, depth + 1, out); #else dump_obj(0, rb_funcall2(obj, rb_intern("real"), 0, 0), depth + 1, out); dump_obj(0, rb_funcall2(obj, rb_intern("imag"), 0, 0), depth + 1, out); #endif out->w_end(out, &e); break; #endif #ifdef T_RATIONAL case T_RATIONAL: e.type = RationalCode; out->w_start(out, &e); #ifdef RRATIONAL dump_obj(0, RRATIONAL(obj)->num, depth + 1, out); dump_obj(0, RRATIONAL(obj)->den, depth + 1, out); #else dump_obj(0, rb_funcall2(obj, rb_intern("numerator"), 0, 0), depth + 1, out); dump_obj(0, rb_funcall2(obj, rb_intern("denominator"), 0, 0), depth + 1, out); #endif out->w_end(out, &e); break; #endif case T_CLASS: { e.type = ClassCode; e.clas.str = rb_class2name(obj); e.clas.len = strlen(e.clas.str); e.closed = 1; out->w_start(out, &e); break; } default: if (StrictEffort == out->opts->effort) { rb_raise(rb_eNotImpError, "Failed to dump %s Object (%02x)\n", rb_obj_classname(obj), rb_type(obj)); } else { e.type = NilClassCode; e.closed = 1; out->w_start(out, &e); } break; } out->obj = prev_obj; } static int dump_var(ID key, VALUE value, Out out) { if (T_DATA == rb_type(value) && key == ox_mesg_id) { /* There is a secret recipe that keeps Exception mesg attributes as a * T_DATA until it is needed. The safe way around this hack is to call * the message() method and use the returned string as the * message. Not pretty but it solves the most common use of this * hack. If there are others they will have to be handled one at a * time. */ value = rb_funcall(out->obj, ox_message_id, 0); } dump_obj(key, value, out->depth, out); return ST_CONTINUE; } static int dump_hash(VALUE key, VALUE value, Out out) { dump_obj(0, key, out->depth, out); dump_obj(0, value, out->depth, out); return ST_CONTINUE; } static void dump_gen_doc(VALUE obj, int depth, Out out) { volatile VALUE attrs = rb_attr_get(obj, ox_attributes_id); volatile VALUE nodes = rb_attr_get(obj, ox_nodes_id); if ('\0' == *out->opts->encoding && Qnil != attrs) { volatile VALUE renc = rb_hash_lookup(attrs, ox_encoding_sym); if (Qnil != renc) { const char *enc = StringValuePtr(renc); strncpy(out->opts->encoding, enc, sizeof(out->opts->encoding) - 1); } } if (Yes == out->opts->with_xml) { if (0 < out->opts->margin_len) { dump_value(out, out->opts->margin, out->opts->margin_len); } dump_value(out, "", 2); } if (Yes == out->opts->with_instruct) { if (out->buf < out->cur) { dump_value(out, "\n", 1); } if (0 < out->opts->margin_len) { dump_value(out, out->opts->margin, out->opts->margin_len); } dump_value(out, "", 35); } if (Qnil != nodes) { dump_gen_nodes(nodes, depth, out); } } static void dump_gen_element(VALUE obj, int depth, Out out) { volatile VALUE rname = rb_attr_get(obj, ox_at_value_id); volatile VALUE attrs = rb_attr_get(obj, ox_attributes_id); volatile VALUE nodes = rb_attr_get(obj, ox_nodes_id); const char *name = StringValuePtr(rname); long nlen = RSTRING_LEN(rname); size_t size; int indent; if (0 > out->indent) { indent = -1; } else if (0 == out->indent) { indent = 0; } else { indent = depth * out->indent; } size = indent + 4 + nlen + out->opts->margin_len; if (out->end - out->cur <= (long)size) { grow(out, size); } if (0 == depth && 0 < out->opts->margin_len && 0 < out->indent) { memcpy(out->cur, out->opts->margin, out->opts->margin_len); out->cur += out->opts->margin_len; } fill_indent(out, indent); *out->cur++ = '<'; fill_value(out, name, nlen); if (Qnil != attrs) { rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out); } if (Qnil != nodes && 0 < RARRAY_LEN(nodes)) { int do_indent; *out->cur++ = '>'; do_indent = dump_gen_nodes(nodes, depth, out); if (out->end - out->cur <= (long)size) { grow(out, size); } if (do_indent) { fill_indent(out, indent); } *out->cur++ = '<'; *out->cur++ = '/'; fill_value(out, name, nlen); } else { *out->cur++ = '/'; } *out->cur++ = '>'; *out->cur = '\0'; } static void dump_gen_instruct(VALUE obj, int depth, Out out) { volatile VALUE rname = rb_attr_get(obj, ox_at_value_id); volatile VALUE attrs = rb_attr_get(obj, ox_attributes_id); volatile VALUE rcontent = rb_attr_get(obj, ox_at_content_id); const char *name = StringValuePtr(rname); const char *content = 0; long nlen = RSTRING_LEN(rname); long clen = 0; size_t size; if (T_STRING == rb_type(rcontent)) { content = StringValuePtr(rcontent); clen = RSTRING_LEN(rcontent); size = 4 + nlen + clen; } else { size = 4 + nlen; } if (out->end - out->cur <= (long)size) { grow(out, size); } *out->cur++ = '<'; *out->cur++ = '?'; fill_value(out, name, nlen); if (0 != content) { fill_value(out, content, clen); } else if (Qnil != attrs) { rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out); } *out->cur++ = '?'; *out->cur++ = '>'; *out->cur = '\0'; } static int dump_gen_nodes(VALUE obj, int depth, Out out) { long cnt = RARRAY_LEN(obj); int indent_needed = 1; if (0 < cnt) { const VALUE *np = RARRAY_PTR(obj); VALUE clas; int d2 = depth + 1; if (MAX_DEPTH < depth) { rb_raise(rb_eSysStackError, "maximum depth exceeded"); } for (; 0 < cnt; cnt--, np++) { clas = rb_obj_class(*np); if (ox_element_clas == clas) { dump_gen_element(*np, d2, out); } else if (ox_instruct_clas == clas) { dump_gen_instruct(*np, d2, out); indent_needed = (1 == cnt) ? 0 : 1; } else if (rb_cString == clas) { dump_str_value(out, StringValuePtr(*(VALUE*)np), RSTRING_LEN(*np), xml_element_chars); indent_needed = (1 == cnt) ? 0 : 1; } else if (ox_comment_clas == clas) { dump_gen_val_node(*np, d2, "", 4, out); } else if (ox_raw_clas == clas) { dump_gen_val_node(*np, d2, "", 0, "", 0, out); } else if (ox_cdata_clas == clas) { dump_gen_val_node(*np, d2, "", 3, out); } else if (ox_doctype_clas == clas) { dump_gen_val_node(*np, d2, "", 1, out); } else { rb_raise(rb_eTypeError, "Unexpected class, %s, while dumping generic XML\n", rb_class2name(clas)); } } } return indent_needed; } static int dump_gen_attr(VALUE key, VALUE value, Out out) { const char *ks; size_t klen; size_t size; #if HAS_PRIVATE_ENCODING // There seems to be a bug in jruby for converting symbols to strings and preserving the encoding. This is a work // around. ks = rb_str_ptr(rb_String(key)); #else switch (rb_type(key)) { case T_SYMBOL: ks = rb_id2name(SYM2ID(key)); break; case T_STRING: ks = StringValuePtr(key); break; default: key = rb_String(key); ks = StringValuePtr(key); break; } #endif klen = strlen(ks); value = rb_String(value); size = 4 + klen + RSTRING_LEN(value); if (out->end - out->cur <= (long)size) { grow(out, size); } *out->cur++ = ' '; fill_value(out, ks, klen); *out->cur++ = '='; *out->cur++ = '"'; dump_str_value(out, StringValuePtr(value), RSTRING_LEN(value), xml_quote_chars); *out->cur++ = '"'; return ST_CONTINUE; } static void dump_gen_val_node(VALUE obj, int depth, const char *pre, size_t plen, const char *suf, size_t slen, Out out) { volatile VALUE v = rb_attr_get(obj, ox_at_value_id); const char *val; size_t vlen; size_t size; int indent; if (T_STRING != rb_type(v)) { return; } val = StringValuePtr(v); vlen = RSTRING_LEN(v); if (0 > out->indent) { indent = -1; } else if (0 == out->indent) { indent = 0; } else { indent = depth * out->indent; } size = indent + plen + slen + vlen + out->opts->margin_len; if (out->end - out->cur <= (long)size) { grow(out, size); } fill_indent(out, indent); fill_value(out, pre, plen); fill_value(out, val, vlen); fill_value(out, suf, slen); *out->cur = '\0'; } static void dump_obj_to_xml(VALUE obj, Options copts, Out out) { VALUE clas = rb_obj_class(obj); out->w_time = (Yes == copts->xsd_date) ? dump_time_xsd : dump_time_thin; out->buf = ALLOC_N(char, 65336); out->end = out->buf + 65325; /* 10 less than end plus extra for possible errors */ out->cur = out->buf; out->circ_cache = 0; out->circ_cnt = 0; out->opts = copts; out->obj = obj; *out->cur = '\0'; if (Yes == copts->circular) { ox_cache8_new(&out->circ_cache); } out->indent = copts->indent; if (ox_document_clas == clas) { dump_gen_doc(obj, -1, out); } else if (ox_element_clas == clas) { dump_gen_element(obj, 0, out); } else { out->w_start = dump_start; out->w_end = dump_end; dump_first_obj(obj, out); } if (0 <= out->indent) { dump_value(out, "\n", 1); } if (Yes == copts->circular) { ox_cache8_delete(out->circ_cache); } } char* ox_write_obj_to_str(VALUE obj, Options copts) { struct _out out; dump_obj_to_xml(obj, copts, &out); return out.buf; } void ox_write_obj_to_file(VALUE obj, const char *path, Options copts) { struct _out out; size_t size; FILE *f; dump_obj_to_xml(obj, copts, &out); size = out.cur - out.buf; if (0 == (f = fopen(path, "w"))) { rb_raise(rb_eIOError, "%s\n", strerror(errno)); } if (size != fwrite(out.buf, 1, size, f)) { int err = ferror(f); rb_raise(rb_eIOError, "Write failed. [%d:%s]\n", err, strerror(err)); } xfree(out.buf); fclose(f); } ox-2.11.0/ext/ox/cache8.h0000644000004100000410000000072213502763477015022 0ustar www-datawww-data/* cache8.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_CACHE8_H #define OX_CACHE8_H #include "ruby.h" #include "stdint.h" typedef struct _cache8 *Cache8; typedef uint64_t slot_t; typedef uint64_t sid_t; extern void ox_cache8_new(Cache8 *cache); extern void ox_cache8_delete(Cache8 cache); extern slot_t ox_cache8_get(Cache8 cache, sid_t key, slot_t **slot); //extern void ox_cache8_print(Cache8 cache); #endif /* OX_CACHE8_H */ ox-2.11.0/ext/ox/encode.h0000644000004100000410000000074213502763477015126 0ustar www-datawww-data/* encode.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef __OX_ENCODE_H__ #define __OX_ENCODE_H__ #include "ruby.h" #if HAS_ENCODING_SUPPORT #include "ruby/encoding.h" #endif static inline VALUE ox_encode(VALUE rstr) { #if HAS_ENCODING_SUPPORT rb_enc_associate(rstr, ox_utf8_encoding); #else if (Qnil != ox_utf8_encoding) { rstr = rb_funcall(ox_utf8_encoding, ox_iconv_id, 1, rstr); } #endif return rstr; } #endif /* __OX_ENCODE_H__ */ ox-2.11.0/ext/ox/base64.h0000644000004100000410000000062013502763477014750 0ustar www-datawww-data/* base64.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef BASE64_H #define BASE64_H typedef unsigned char uchar; #define b64_size(len) ((len + 2) / 3 * 4) extern unsigned long b64_orig_size(const char *text); extern void to_base64(const uchar *src, int len, char *b64); extern void from_base64(const char *b64, uchar *str); #endif /* BASE64_H */ ox-2.11.0/ext/ox/sax_as.c0000644000004100000410000001420213502763477015136 0ustar www-datawww-data/* sax_as.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include #if NEEDS_UIO #include #endif #include #include #include "ruby.h" #include "ox.h" #include "sax.h" static VALUE parse_double_time(const char *text) { long v = 0; long v2 = 0; const char *dot = 0; char c; for (; '.' != *text; text++) { c = *text; if (c < '0' || '9' < c) { return Qnil; } v = 10 * v + (long)(c - '0'); } dot = text++; for (; '\0' != *text && text - dot <= 6; text++) { c = *text; if (c < '0' || '9' < c) { return Qnil; } v2 = 10 * v2 + (long)(c - '0'); } for (; text - dot <= 9; text++) { v2 *= 10; } #if HAS_NANO_TIME return rb_time_nano_new(v, v2); #else return rb_time_new(v, v2 / 1000); #endif } typedef struct _tp { int cnt; char end; char alt; } *Tp; static VALUE parse_xsd_time(const char *text) { long cargs[10]; long *cp = cargs; long v; int i; char c = '\0'; struct _tp tpa[10] = { { 4, '-', '-' }, { 2, '-', '-' }, { 2, 'T', ' ' }, { 2, ':', ':' }, { 2, ':', ':' }, { 2, '.', '.' }, { 9, '+', '-' }, { 2, ':', ':' }, { 2, '\0', '\0' }, { 0, '\0', '\0' } }; Tp tp = tpa; struct tm tm; memset(cargs, 0, sizeof(cargs)); for (; 0 != tp->cnt; tp++) { for (i = tp->cnt, v = 0; 0 < i ; text++, i--) { c = *text; if (c < '0' || '9' < c) { if ('\0' == c || tp->end == c || tp->alt == c) { break; } return Qnil; } v = 10 * v + (long)(c - '0'); } if ('\0' == c) { break; } c = *text++; if (tp->end != c && tp->alt != c) { return Qnil; } *cp++ = v; } tm.tm_year = (int)cargs[0] - 1900; tm.tm_mon = (int)cargs[1] - 1; tm.tm_mday = (int)cargs[2]; tm.tm_hour = (int)cargs[3]; tm.tm_min = (int)cargs[4]; tm.tm_sec = (int)cargs[5]; #if HAS_NANO_TIME return rb_time_nano_new(mktime(&tm), cargs[6]); #else return rb_time_new(mktime(&tm), cargs[6] / 1000); #endif } /* call-seq: as_s() * * *return* value as an String. */ static VALUE sax_value_as_s(VALUE self) { SaxDrive dr = DATA_PTR(self); VALUE rs; if ('\0' == *dr->buf.str) { return Qnil; } if (dr->options.convert_special) { ox_sax_collapse_special(dr, dr->buf.str, dr->buf.pos, dr->buf.line, dr->buf.col); } switch (dr->options.skip) { case CrSkip: buf_collapse_return(dr->buf.str); break; case SpcSkip: buf_collapse_white(dr->buf.str); break; default: break; } rs = rb_str_new2(dr->buf.str); #if HAS_ENCODING_SUPPORT if (0 != dr->encoding) { rb_enc_associate(rs, dr->encoding); } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding) { rb_funcall(rs, ox_force_encoding_id, 1, dr->encoding); } #endif return rs; } /* call-seq: as_sym() * * *return* value as an Symbol. */ static VALUE sax_value_as_sym(VALUE self) { SaxDrive dr = DATA_PTR(self); if ('\0' == *dr->buf.str) { return Qnil; } return str2sym(dr, dr->buf.str, 0); } /* call-seq: as_f() * * *return* value as an Float. */ static VALUE sax_value_as_f(VALUE self) { SaxDrive dr = DATA_PTR(self); if ('\0' == *dr->buf.str) { return Qnil; } return rb_float_new(strtod(dr->buf.str, 0)); } /* call-seq: as_i() * * *return* value as an Fixnum. */ static VALUE sax_value_as_i(VALUE self) { SaxDrive dr = DATA_PTR(self); const char *s = dr->buf.str; long n = 0; int neg = 0; if ('\0' == *s) { return Qnil; } if ('-' == *s) { neg = 1; s++; } else if ('+' == *s) { s++; } for (; '\0' != *s; s++) { if ('0' <= *s && *s <= '9') { n = n * 10 + (*s - '0'); } else { rb_raise(ox_arg_error_class, "Not a valid Fixnum.\n"); } } if (neg) { n = -n; } return LONG2NUM(n); } /* call-seq: as_time() * * *return* value as an Time. */ static VALUE sax_value_as_time(VALUE self) { SaxDrive dr = DATA_PTR(self); const char *str = dr->buf.str; VALUE t; if ('\0' == *str) { return Qnil; } if (Qnil == (t = parse_double_time(str)) && Qnil == (t = parse_xsd_time(str))) { VALUE args[1]; /*printf("**** time parse\n"); */ *args = rb_str_new2(str); t = rb_funcall2(ox_time_class, ox_parse_id, 1, args); } return t; } /* call-seq: as_bool() * * *return* value as an boolean. */ static VALUE sax_value_as_bool(VALUE self) { return (0 == strcasecmp("true", ((SaxDrive)DATA_PTR(self))->buf.str)) ? Qtrue : Qfalse; } /* call-seq: empty() * * *return* true if the value is empty. */ static VALUE sax_value_empty(VALUE self) { return ('\0' == *((SaxDrive)DATA_PTR(self))->buf.str) ? Qtrue : Qfalse; } /* Document-class: Ox::Sax::Value * * Values in the SAX callbacks. They can be converted to various different * types. with the _as_x()_ methods. */ void ox_sax_define() { #if 0 ox = rb_define_module("Ox"); sax_module = rb_define_class_under(ox, "Sax", rb_cObject); #endif VALUE sax_module = rb_const_get_at(Ox, rb_intern("Sax")); ox_sax_value_class = rb_define_class_under(sax_module, "Value", rb_cObject); rb_define_method(ox_sax_value_class, "as_s", sax_value_as_s, 0); rb_define_method(ox_sax_value_class, "as_sym", sax_value_as_sym, 0); rb_define_method(ox_sax_value_class, "as_i", sax_value_as_i, 0); rb_define_method(ox_sax_value_class, "as_f", sax_value_as_f, 0); rb_define_method(ox_sax_value_class, "as_time", sax_value_as_time, 0); rb_define_method(ox_sax_value_class, "as_bool", sax_value_as_bool, 0); rb_define_method(ox_sax_value_class, "empty?", sax_value_empty, 0); } ox-2.11.0/ext/ox/err.h0000644000004100000410000000145613502763477014464 0ustar www-datawww-data/* err.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_ERR_H #define OX_ERR_H #include "ruby.h" #define set_error(err, msg, xml, current) _ox_err_set_with_location(err, msg, xml, current, __FILE__, __LINE__) typedef struct _err { VALUE clas; char msg[128]; } *Err; extern VALUE ox_arg_error_class; extern VALUE ox_parse_error_class; extern VALUE ox_syntax_error_class; extern void ox_err_set(Err e, VALUE clas, const char *format, ...); extern void _ox_err_set_with_location(Err err, const char *msg, const char *xml, const char *current, const char* file, int line); extern void ox_err_raise(Err e); inline static void err_init(Err e) { e->clas = Qnil; *e->msg = '\0'; } inline static int err_has(Err e) { return (Qnil != e->clas); } #endif /* OX_ERR_H */ ox-2.11.0/ext/ox/sax_buf.c0000644000004100000410000001332013502763477015307 0ustar www-datawww-data/* sax_buf.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include #if NEEDS_UIO #include #endif #include #include #include "ruby.h" #include "ox.h" #include "sax.h" #define BUF_PAD 4 static VALUE rescue_cb(VALUE rdr, VALUE err); static VALUE io_cb(VALUE rdr); static VALUE partial_io_cb(VALUE rdr); static int read_from_io(Buf buf); static int read_from_fd(Buf buf); static int read_from_io_partial(Buf buf); static int read_from_str(Buf buf); void ox_sax_buf_init(Buf buf, VALUE io) { volatile VALUE io_class = rb_obj_class(io); VALUE rfd; if (rb_cString == io_class) { buf->read_func = read_from_str; buf->in.str = StringValuePtr(io); } else if (ox_stringio_class == io_class && 0 == FIX2INT(rb_funcall2(io, ox_pos_id, 0, 0))) { volatile VALUE s = rb_funcall2(io, ox_string_id, 0, 0); buf->read_func = read_from_str; buf->in.str = StringValuePtr(s); } else if (rb_cFile == io_class && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) { buf->read_func = read_from_fd; buf->in.fd = FIX2INT(rfd); } else if (rb_respond_to(io, ox_readpartial_id)) { buf->read_func = read_from_io_partial; buf->in.io = io; } else if (rb_respond_to(io, ox_read_id)) { buf->read_func = read_from_io; buf->in.io = io; } else { rb_raise(ox_arg_error_class, "sax_parser io argument must respond to readpartial() or read().\n"); } buf->head = buf->base; *buf->head = '\0'; buf->end = buf->head + sizeof(buf->base) - BUF_PAD; buf->tail = buf->head; buf->read_end = buf->head; buf->pro = 0; buf->str = 0; buf->pos = 0; buf->line = 1; buf->col = 0; buf->pro_pos = 1; buf->pro_line = 1; buf->pro_col = 0; buf->dr = 0; } int ox_sax_buf_read(Buf buf) { int err; size_t shift = 0; // if there is not much room to read into, shift or realloc a larger buffer. if (buf->head < buf->tail && 4096 > buf->end - buf->tail) { if (0 == buf->pro) { shift = buf->tail - buf->head; } else { shift = buf->pro - buf->head - 1; // leave one character so we cab backup one } if (0 >= shift) { /* no space left so allocate more */ char *old = buf->head; size_t size = buf->end - buf->head + BUF_PAD; if (buf->head == buf->base) { buf->head = ALLOC_N(char, size * 2); memcpy(buf->head, old, size); } else { REALLOC_N(buf->head, char, size * 2); } buf->end = buf->head + size * 2 - BUF_PAD; buf->tail = buf->head + (buf->tail - old); buf->read_end = buf->head + (buf->read_end - old); if (0 != buf->pro) { buf->pro = buf->head + (buf->pro - old); } if (0 != buf->str) { buf->str = buf->head + (buf->str - old); } } else { memmove(buf->head, buf->head + shift, buf->read_end - (buf->head + shift)); buf->tail -= shift; buf->read_end -= shift; if (0 != buf->pro) { buf->pro -= shift; } if (0 != buf->str) { buf->str -= shift; } } } err = buf->read_func(buf); *buf->read_end = '\0'; return err; } static VALUE rescue_cb(VALUE rbuf, VALUE err) { VALUE err_class = rb_obj_class(err); if (err_class != rb_eTypeError && err_class != rb_eEOFError) { Buf buf = (Buf)rbuf; //ox_sax_drive_cleanup(buf->dr); called after exiting protect rb_raise(err, "at line %ld, column %ld\n", (long)buf->line, (long)buf->col); } return Qfalse; } static VALUE partial_io_cb(VALUE rbuf) { Buf buf = (Buf)rbuf; VALUE args[1]; VALUE rstr; char *str; size_t cnt; args[0] = ULONG2NUM(buf->end - buf->tail); rstr = rb_funcall2(buf->in.io, ox_readpartial_id, 1, args); str = StringValuePtr(rstr); cnt = strlen(str); //printf("*** read partial %lu bytes, str: '%s'\n", cnt, str); strcpy(buf->tail, str); buf->read_end = buf->tail + cnt; return Qtrue; } static VALUE io_cb(VALUE rbuf) { Buf buf = (Buf)rbuf; VALUE args[1]; VALUE rstr; char *str; size_t cnt; args[0] = ULONG2NUM(buf->end - buf->tail); rstr = rb_funcall2(buf->in.io, ox_read_id, 1, args); str = StringValuePtr(rstr); cnt = strlen(str); //printf("*** read %lu bytes, str: '%s'\n", cnt, str); strcpy(buf->tail, str); buf->read_end = buf->tail + cnt; return Qtrue; } static int read_from_io_partial(Buf buf) { return (Qfalse == rb_rescue(partial_io_cb, (VALUE)buf, rescue_cb, (VALUE)buf)); } static int read_from_io(Buf buf) { return (Qfalse == rb_rescue(io_cb, (VALUE)buf, rescue_cb, (VALUE)buf)); } static int read_from_fd(Buf buf) { ssize_t cnt; size_t max = buf->end - buf->tail; cnt = read(buf->in.fd, buf->tail, max); if (cnt < 0) { ox_sax_drive_error(buf->dr, "failed to read from file"); return -1; } else if (0 != cnt) { buf->read_end = buf->tail + cnt; } return 0; } static char* ox_stpncpy(char *dest, const char *src, size_t n) { size_t cnt = strlen(src) + 1; if (n < cnt) { cnt = n; } strncpy(dest, src, cnt); return dest + cnt - 1; } static int read_from_str(Buf buf) { size_t max = buf->end - buf->tail - 1; char *s; long cnt; if ('\0' == *buf->in.str) { /* done */ return -1; } s = ox_stpncpy(buf->tail, buf->in.str, max); *s = '\0'; cnt = s - buf->tail; buf->in.str += cnt; buf->read_end = buf->tail + cnt; return 0; } ox-2.11.0/ext/ox/cache.h0000644000004100000410000000056713502763477014741 0ustar www-datawww-data/* cache.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_CACHE_H #define OX_CACHE_H #include "ruby.h" typedef struct _cache *Cache; extern void ox_cache_new(Cache *cache); extern VALUE ox_cache_get(Cache cache, const char *key, VALUE **slot, const char **keyp); extern void ox_cache_print(Cache cache); #endif /* OX_CACHE_H */ ox-2.11.0/ext/ox/err.c0000644000004100000410000000153113502763477014451 0ustar www-datawww-data/* err.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include "err.h" void ox_err_set(Err e, VALUE clas, const char *format, ...) { va_list ap; va_start(ap, format); e->clas = clas; vsnprintf(e->msg, sizeof(e->msg) - 1, format, ap); va_end(ap); } #if __GNUC__ > 4 _Noreturn void #else void #endif ox_err_raise(Err e) { rb_raise(e->clas, "%s", e->msg); } void _ox_err_set_with_location(Err err, const char *msg, const char *xml, const char *current, const char* file, int line) { int xline = 1; int col = 1; for (; xml < current && '\n' != *current; current--) { col++; } for (; xml < current; current--) { if ('\n' == *current) { xline++; } } ox_err_set(err, ox_parse_error_class, "%s at line %d, column %d [%s:%d]\n", msg, xline, col, file, line); } ox-2.11.0/ext/ox/sax_hint.h0000644000004100000410000000156313502763477015510 0ustar www-datawww-data/* hint.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_HINT_H #define OX_HINT_H #include typedef enum { ActiveOverlay = 0, InactiveOverlay = 'i', BlockOverlay = 'b', OffOverlay = 'o', AbortOverlay = 'a', NestOverlay = 'n', // nest flag is ignored } Overlay; typedef struct _hint { const char *name; char empty; // must be closed or close auto it, not error char nest; // nesting allowed char jump; // jump to end char overlay;// Overlay const char **parents; } *Hint; typedef struct _hints { const char *name; Hint hints; // array of hints int size; } *Hints; extern Hints ox_hints_html(void); extern Hint ox_hint_find(Hints hints, const char *name); extern Hints ox_hints_dup(Hints h); extern void ox_hints_destroy(Hints h); #endif /* OX_HINT_H */ ox-2.11.0/ext/ox/sax_hint.c0000644000004100000410000002237213502763477015504 0ustar www-datawww-data/* hint.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include "sax_hint.h" static const char *audio_video_0[] = { "audio", "video", 0 }; static const char *colgroup_0[] = { "colgroup", 0 }; static const char *details_0[] = { "details", 0 }; static const char *dl_0[] = { "dl", 0 }; static const char *dt_th_0[] = { "dt", "th", 0 }; static const char *fieldset_0[] = { "fieldset", 0 }; static const char *figure_0[] = { "figure", 0 }; static const char *frameset_0[] = { "frameset", 0 }; static const char *head_0[] = { "head", 0 }; static const char *html_0[] = { "html", 0 }; static const char *map_0[] = { "map", 0 }; static const char *ol_ul_menu_0[] = { "ol", "ul", "menu", 0 }; static const char *optgroup_select_datalist_0[] = { "optgroup", "select", "datalist", 0 }; static const char *ruby_0[] = { "ruby", 0 }; static const char *table_0[] = { "table", 0 }; static const char *tr_0[] = { "tr", 0 }; static struct _hint html_hint_array[] = { { "!--", false, false, false, ActiveOverlay, NULL }, // comment { "a", false, false, false, ActiveOverlay, NULL }, { "abbr", false, false, false, ActiveOverlay, NULL }, { "acronym", false, false, false, ActiveOverlay, NULL }, { "address", false, false, false, ActiveOverlay, NULL }, { "applet", false, false, false, ActiveOverlay, NULL }, { "area", true, false, false, ActiveOverlay, map_0 }, { "article", false, false, false, ActiveOverlay, NULL }, { "aside", false, false, false, ActiveOverlay, NULL }, { "audio", false, false, false, ActiveOverlay, NULL }, { "b", false, false, false, ActiveOverlay, NULL }, { "base", true, false, false, ActiveOverlay, head_0 }, { "basefont", true, false, false, ActiveOverlay, head_0 }, { "bdi", false, false, false, ActiveOverlay, NULL }, { "bdo", false, true, false, ActiveOverlay, NULL }, { "big", false, false, false, ActiveOverlay, NULL }, { "blockquote", false, false, false, ActiveOverlay, NULL }, { "body", false, false, false, ActiveOverlay, html_0 }, { "br", true, false, false, ActiveOverlay, NULL }, { "button", false, false, false, ActiveOverlay, NULL }, { "canvas", false, false, false, ActiveOverlay, NULL }, { "caption", false, false, false, ActiveOverlay, table_0 }, { "center", false, false, false, ActiveOverlay, NULL }, { "cite", false, false, false, ActiveOverlay, NULL }, { "code", false, false, false, ActiveOverlay, NULL }, { "col", true, false, false, ActiveOverlay, colgroup_0 }, { "colgroup", false, false, false, ActiveOverlay, NULL }, { "command", true, false, false, ActiveOverlay, NULL }, { "datalist", false, false, false, ActiveOverlay, NULL }, { "dd", false, false, false, ActiveOverlay, dl_0 }, { "del", false, false, false, ActiveOverlay, NULL }, { "details", false, false, false, ActiveOverlay, NULL }, { "dfn", false, false, false, ActiveOverlay, NULL }, { "dialog", false, false, false, ActiveOverlay, dt_th_0 }, { "dir", false, false, false, ActiveOverlay, NULL }, { "div", false, true, false, ActiveOverlay, NULL }, { "dl", false, false, false, ActiveOverlay, NULL }, { "dt", false, true, false, ActiveOverlay, dl_0 }, { "em", false, false, false, ActiveOverlay, NULL }, { "embed", true, false, false, ActiveOverlay, NULL }, { "fieldset", false, false, false, ActiveOverlay, NULL }, { "figcaption", false, false, false, ActiveOverlay, figure_0 }, { "figure", false, false, false, ActiveOverlay, NULL }, { "font", false, true, false, ActiveOverlay, NULL }, { "footer", false, false, false, ActiveOverlay, NULL }, { "form", false, false, false, ActiveOverlay, NULL }, { "frame", true, false, false, ActiveOverlay, frameset_0 }, { "frameset", false, false, false, ActiveOverlay, NULL }, { "h1", false, false, false, ActiveOverlay, NULL }, { "h2", false, false, false, ActiveOverlay, NULL }, { "h3", false, false, false, ActiveOverlay, NULL }, { "h4", false, false, false, ActiveOverlay, NULL }, { "h5", false, false, false, ActiveOverlay, NULL }, { "h6", false, false, false, ActiveOverlay, NULL }, { "head", false, false, false, ActiveOverlay, html_0 }, { "header", false, false, false, ActiveOverlay, NULL }, { "hgroup", false, false, false, ActiveOverlay, NULL }, { "hr", true, false, false, ActiveOverlay, NULL }, { "html", false, false, false, ActiveOverlay, NULL }, { "i", false, false, false, ActiveOverlay, NULL }, { "iframe", true, false, false, ActiveOverlay, NULL }, { "img", true, false, false, ActiveOverlay, NULL }, { "input", true, false, false, ActiveOverlay, NULL }, // somewhere under a form_0 { "ins", false, false, false, ActiveOverlay, NULL }, { "kbd", false, false, false, ActiveOverlay, NULL }, { "keygen", true, false, false, ActiveOverlay, NULL }, { "label", false, false, false, ActiveOverlay, NULL }, // somewhere under a form_0 { "legend", false, false, false, ActiveOverlay, fieldset_0 }, { "li", false, false, false, ActiveOverlay, ol_ul_menu_0 }, { "link", true, false, false, ActiveOverlay, head_0 }, { "map", false, false, false, ActiveOverlay, NULL }, { "mark", false, false, false, ActiveOverlay, NULL }, { "menu", false, false, false, ActiveOverlay, NULL }, { "meta", true, false, false, ActiveOverlay, head_0 }, { "meter", false, false, false, ActiveOverlay, NULL }, { "nav", false, false, false, ActiveOverlay, NULL }, { "noframes", false, false, false, ActiveOverlay, NULL }, { "noscript", false, false, false, ActiveOverlay, NULL }, { "object", false, false, false, ActiveOverlay, NULL }, { "ol", false, true, false, ActiveOverlay, NULL }, { "optgroup", false, false, false, ActiveOverlay, NULL }, { "option", false, false, false, ActiveOverlay, optgroup_select_datalist_0 }, { "output", false, false, false, ActiveOverlay, NULL }, { "p", false, false, false, ActiveOverlay, NULL }, { "param", true, false, false, ActiveOverlay, NULL }, { "pre", false, false, false, ActiveOverlay, NULL }, { "progress", false, false, false, ActiveOverlay, NULL }, { "q", false, false, false, ActiveOverlay, NULL }, { "rp", false, false, false, ActiveOverlay, ruby_0 }, { "rt", false, false, false, ActiveOverlay, ruby_0 }, { "ruby", false, false, false, ActiveOverlay, NULL }, { "s", false, false, false, ActiveOverlay, NULL }, { "samp", false, false, false, ActiveOverlay, NULL }, { "script", false, false, true, ActiveOverlay, NULL }, { "section", false, true, false, ActiveOverlay, NULL }, { "select", false, false, false, ActiveOverlay, NULL }, { "small", false, false, false, ActiveOverlay, NULL }, { "source", false, false, false, ActiveOverlay, audio_video_0 }, { "span", false, true, false, ActiveOverlay, NULL }, { "strike", false, false, false, ActiveOverlay, NULL }, { "strong", false, false, false, ActiveOverlay, NULL }, { "style", false, false, false, ActiveOverlay, NULL }, { "sub", false, false, false, ActiveOverlay, NULL }, { "summary", false, false, false, ActiveOverlay, details_0 }, { "sup", false, false, false, ActiveOverlay, NULL }, { "table", false, false, false, ActiveOverlay, NULL }, { "tbody", false, false, false, ActiveOverlay, table_0 }, { "td", false, false, false, ActiveOverlay, tr_0 }, { "textarea", false, false, false, ActiveOverlay, NULL }, { "tfoot", false, false, false, ActiveOverlay, table_0 }, { "th", false, false, false, ActiveOverlay, tr_0 }, { "thead", false, false, false, ActiveOverlay, table_0 }, { "time", false, false, false, ActiveOverlay, NULL }, { "title", false, false, false, ActiveOverlay, head_0 }, { "tr", false, false, false, ActiveOverlay, table_0 }, { "track", true, false, false, ActiveOverlay, audio_video_0 }, { "tt", false, false, false, ActiveOverlay, NULL }, { "u", false, false, false, ActiveOverlay, NULL }, { "ul", false, false, false, ActiveOverlay, NULL }, { "var", false, false, false, ActiveOverlay, NULL }, { "video", false, false, false, ActiveOverlay, NULL }, { "wbr", true, false, false, ActiveOverlay, NULL }, }; static struct _hints html_hints = { "HTML", html_hint_array, sizeof(html_hint_array) / sizeof(*html_hint_array) }; Hints ox_hints_html() { return &html_hints; } Hints ox_hints_dup(Hints h) { Hints nh = ALLOC(struct _hints); nh->hints = ALLOC_N(struct _hint, h->size); memcpy(nh->hints, h->hints, sizeof(struct _hint) * h->size); nh->size = h->size; nh->name = h->name; return nh; } void ox_hints_destroy(Hints h) { if (NULL != h && &html_hints != h) { xfree(h->hints); xfree(h); } } Hint ox_hint_find(Hints hints, const char *name) { if (0 != hints) { Hint lo = hints->hints; Hint hi = hints->hints + hints->size - 1; Hint mid; int res; if (0 == (res = strcasecmp(name, lo->name))) { return lo; } else if (0 > res) { return 0; } if (0 == (res = strcasecmp(name, hi->name))) { return hi; } else if (0 < res) { return 0; } while (1 < hi - lo) { mid = lo + (hi - lo) / 2; if (0 == (res = strcasecmp(name, mid->name))) { return mid; } else if (0 < res) { lo = mid; } else { hi = mid; } } } return 0; } ox-2.11.0/ext/ox/extconf.rb0000644000004100000410000000617113502763477015515 0ustar www-datawww-datarequire 'mkmf' extension_name = 'ox' dir_config(extension_name) parts = RUBY_DESCRIPTION.split(' ') type = parts[0].downcase() type = 'ree' if 'ruby' == type && RUBY_DESCRIPTION.include?('Ruby Enterprise Edition') is_windows = RbConfig::CONFIG['host_os'] =~ /(mingw|mswin)/ platform = RUBY_PLATFORM version = RUBY_VERSION.split('.') puts ">>>>> Creating Makefile for #{type} version #{RUBY_VERSION} on #{platform} <<<<<" dflags = { 'RUBY_TYPE' => type, (type.upcase + '_RUBY') => nil, 'RUBY_VERSION' => RUBY_VERSION, 'RUBY_VERSION_MAJOR' => version[0], 'RUBY_VERSION_MINOR' => version[1], 'RUBY_VERSION_MICRO' => version[2], 'HAS_RB_TIME_TIMESPEC' => ('ruby' == type && ('1.9.3' == RUBY_VERSION)) ? 1 : 0, #'HAS_RB_TIME_TIMESPEC' => ('ruby' == type && ('1.9.3' == RUBY_VERSION || '2' <= version[0])) ? 1 : 0, 'HAS_TM_GMTOFF' => ('ruby' == type && (('1' == version[0] && '9' == version[1]) || '2' <= version[0]) && !(platform.include?('cygwin') || platform.include?('solaris') || platform.include?('linux') || RUBY_PLATFORM =~ /(win|w)32$/)) ? 1 : 0, 'HAS_ENCODING_SUPPORT' => (('ruby' == type || 'rubinius' == type || 'macruby' == type) && (('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0, 'HAS_ONIG' => (('ruby' == type || 'jruby' == type || 'rubinius' == type) && (('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0, 'HAS_PRIVATE_ENCODING' => ('jruby' == type && '1' == version[0] && '9' == version[1]) ? 1 : 0, 'HAS_NANO_TIME' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0, 'HAS_RSTRUCT' => ('ruby' == type || 'ree' == type) ? 1 : 0, 'HAS_IVAR_HELPERS' => ('ruby' == type && !is_windows && (('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0, 'HAS_PROC_WITH_BLOCK' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0, 'HAS_GC_GUARD' => ('jruby' != type && 'rubinius' != type) ? 1 : 0, 'HAS_BIGDECIMAL' => ('jruby' != type) ? 1 : 0, 'HAS_TOP_LEVEL_ST_H' => ('ree' == type || ('ruby' == type && '1' == version[0] && '8' == version[1])) ? 1 : 0, 'NEEDS_UIO' => (RUBY_PLATFORM =~ /(win|w)32$/) ? 0 : 1, 'HAS_DATA_OBJECT_WRAP' => ('ruby' == type && '2' == version[0] && '3' <= version[1]) ? 1 : 0, 'UNIFY_FIXNUM_AND_BIGNUM' => ('ruby' == type && '2' == version[0] && '4' <= version[1]) ? 1 : 0, } if RUBY_PLATFORM =~ /(win|w)32$/ || RUBY_PLATFORM =~ /solaris2\.10/ dflags['NEEDS_STPCPY'] = nil end if ['i386-darwin10.0.0', 'x86_64-darwin10.8.0'].include? RUBY_PLATFORM dflags['NEEDS_STPCPY'] = nil dflags['HAS_IVAR_HELPERS'] = 0 if ('ruby' == type && '1.9.1' == RUBY_VERSION) elsif 'x86_64-linux' == RUBY_PLATFORM && '1.9.3' == RUBY_VERSION && '2011-10-30' == RUBY_RELEASE_DATE begin dflags['NEEDS_STPCPY'] = nil if `more /etc/issue`.include?('CentOS release 5.4') rescue Exception end end dflags.each do |k,v| if v.nil? $CPPFLAGS += " -D#{k}" else $CPPFLAGS += " -D#{k}=#{v}" end end $CPPFLAGS += ' -Wall' #puts "*** $CPPFLAGS: #{$CPPFLAGS}" create_makefile(extension_name) %x{make clean} ox-2.11.0/ext/ox/special.h0000644000004100000410000000034313502763477015306 0ustar www-datawww-data/* special.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_SPECIAL_H #define OX_SPECIAL_H #include extern char* ox_ucs_to_utf8_chars(char *text, uint64_t u); #endif /* OX_SPECIAL_H */ ox-2.11.0/ext/ox/helper.h0000644000004100000410000000416713502763477015155 0ustar www-datawww-data/* helper.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_HELPER_H #define OX_HELPER_H #include "type.h" #define HELPER_STACK_INC 16 typedef struct _helper { ID var; /* Object var ID */ VALUE obj; /* object created or Qundef if not appropriate */ Type type; /* type of object in obj */ } *Helper; typedef struct _helperStack { struct _helper base[HELPER_STACK_INC]; Helper head; /* current stack */ Helper end; /* stack end */ Helper tail; /* pointer to one past last element name on stack */ } *HelperStack; inline static void helper_stack_init(HelperStack stack) { stack->head = stack->base; stack->end = stack->base + sizeof(stack->base) / sizeof(struct _helper); stack->tail = stack->head; } inline static int helper_stack_empty(HelperStack stack) { return (stack->head == stack->tail); } inline static int helper_stack_depth(HelperStack stack) { return (int)(stack->tail - stack->head); } inline static void helper_stack_cleanup(HelperStack stack) { if (stack->base != stack->head) { xfree(stack->head); stack->head = stack->base; } } inline static Helper helper_stack_push(HelperStack stack, ID var, VALUE obj, Type type) { if (stack->end <= stack->tail) { size_t len = stack->end - stack->head; size_t toff = stack->tail - stack->head; if (stack->base == stack->head) { stack->head = ALLOC_N(struct _helper, len + HELPER_STACK_INC); memcpy(stack->head, stack->base, sizeof(struct _helper) * len); } else { REALLOC_N(stack->head, struct _helper, len + HELPER_STACK_INC); } stack->tail = stack->head + toff; stack->end = stack->head + len + HELPER_STACK_INC; } stack->tail->var = var; stack->tail->obj = obj; stack->tail->type = type; stack->tail++; return stack->tail - 1; } inline static Helper helper_stack_peek(HelperStack stack) { if (stack->head < stack->tail) { return stack->tail - 1; } return 0; } inline static Helper helper_stack_pop(HelperStack stack) { if (stack->head < stack->tail) { stack->tail--; return stack->tail; } return 0; } #endif /* OX_HELPER_H */ ox-2.11.0/ext/ox/cache8.c0000644000004100000410000000411413502763477015014 0ustar www-datawww-data/* cache8.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include #include "ruby.h" #include "cache8.h" #define BITS 4 #define MASK 0x000000000000000FULL #define SLOT_CNT 16 #define DEPTH 16 typedef union { struct _cache8 *child; slot_t value; } Bucket; struct _cache8 { Bucket buckets[SLOT_CNT]; }; static void cache8_delete(Cache8 cache, int depth); //static void slot_print(Cache8 cache, sid_t key, unsigned int depth); void ox_cache8_new(Cache8 *cache) { Bucket *b; int i; *cache = ALLOC(struct _cache8); for (i = SLOT_CNT, b = (*cache)->buckets; 0 < i; i--, b++) { b->value = 0; } } void ox_cache8_delete(Cache8 cache) { cache8_delete(cache, 0); } static void cache8_delete(Cache8 cache, int depth) { Bucket *b; unsigned int i; for (i = 0, b = cache->buckets; i < SLOT_CNT; i++, b++) { if (0 != b->child) { if (DEPTH - 1 != depth) { cache8_delete(b->child, depth + 1); } } } xfree(cache); } slot_t ox_cache8_get(Cache8 cache, sid_t key, slot_t **slot) { Bucket *b; int i; sid_t k8 = (sid_t)key; sid_t k; for (i = 64 - BITS; 0 < i; i -= BITS) { k = (k8 >> i) & MASK; b = cache->buckets + k; if (0 == b->child) { ox_cache8_new(&b->child); } cache = b->child; } *slot = &(cache->buckets + (k8 & MASK))->value; return **slot; } #if 0 void ox_cache8_print(Cache8 cache) { //printf("-------------------------------------------\n"); slot_print(cache, 0, 0); } static void slot_print(Cache8 c, sid_t key, unsigned int depth) { Bucket *b; unsigned int i; sid_t k8 = (sid_t)key; sid_t k; for (i = 0, b = c->buckets; i < SLOT_CNT; i++, b++) { if (0 != b->child) { k = (k8 << BITS) | i; //printf("*** key: 0x%016llx depth: %u i: %u\n", k, depth, i); if (DEPTH - 1 == depth) { printf("0x%016llx: %4llu\n", (unsigned long long)k, (unsigned long long)b->value); } else { slot_print(b->child, k, depth + 1); } } } } #endif ox-2.11.0/ext/ox/cache.c0000644000004100000410000001111413502763477014722 0ustar www-datawww-data/* cache.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include #include #include #include "cache.h" struct _cache { /* The key is a length byte followed by the key as a string. If the key is longer than 254 characters then the length is 255. The key can be for a premature value and in that case the length byte is greater than the length of the key. */ char *key; VALUE value; struct _cache *slots[16]; }; static void slot_print(Cache cache, unsigned int depth); static char* form_key(const char *s) { size_t len = strlen(s); char *d = ALLOC_N(char, len + 2); *(uint8_t*)d = (255 <= len) ? 255 : len; memcpy(d + 1, s, len + 1); return d; } void ox_cache_new(Cache *cache) { *cache = ALLOC(struct _cache); (*cache)->key = 0; (*cache)->value = Qundef; memset((*cache)->slots, 0, sizeof((*cache)->slots)); } VALUE ox_cache_get(Cache cache, const char *key, VALUE **slot, const char **keyp) { unsigned char *k = (unsigned char*)key; Cache *cp; for (; '\0' != *k; k++) { cp = cache->slots + (unsigned int)(*k >> 4); /* upper 4 bits */ if (0 == *cp) { ox_cache_new(cp); } cache = *cp; cp = cache->slots + (unsigned int)(*k & 0x0F); /* lower 4 bits */ if (0 == *cp) { /* nothing on this tree so set key and value as a premature key/value pair */ ox_cache_new(cp); cache = *cp; cache->key = form_key(key); break; } else { int depth = (int)(k - (unsigned char*)key + 1); cache = *cp; if ('\0' == *(k + 1)) { /* exact match */ if (0 == cache->key) { /* nothing in this spot so take it */ cache->key = form_key(key); break; } else if ((depth == *cache->key || 255 < depth) && 0 == strcmp(key, cache->key + 1)) { /* match */ break; } else { /* have to move the current premature key/value deeper */ unsigned char *ck = (unsigned char*)(cache->key + depth + 1); Cache orig = *cp; cp = (*cp)->slots + (*ck >> 4); ox_cache_new(cp); cp = (*cp)->slots + (*ck & 0x0F); ox_cache_new(cp); (*cp)->key = cache->key; (*cp)->value = cache->value; orig->key = form_key(key); orig->value = Qundef; } } else { /* not exact match but on the path */ if (0 != cache->key) { /* there is a key/value here already */ if (depth == *cache->key || (255 <= depth && 0 == strncmp(cache->key, key, depth) && '\0' == cache->key[depth])) { /* key belongs here */ continue; } else { unsigned char *ck = (unsigned char*)(cache->key + depth + 1); Cache orig = *cp; cp = (*cp)->slots + (*ck >> 4); ox_cache_new(cp); cp = (*cp)->slots + (*ck & 0x0F); ox_cache_new(cp); (*cp)->key = cache->key; (*cp)->value = cache->value; orig->key = 0; orig->value = Qundef; } } } } } *slot = &cache->value; if (0 != keyp) { if (0 == cache->key) { printf("*** Error: failed to set the key for '%s'\n", key); *keyp = 0; } else { *keyp = cache->key + 1; } } return cache->value; } void ox_cache_print(Cache cache) { /*printf("-------------------------------------------\n");*/ slot_print(cache, 0); } static void slot_print(Cache c, unsigned int depth) { char indent[256]; Cache *cp; unsigned int i; if (sizeof(indent) - 1 < depth) { depth = ((int)sizeof(indent) - 1); } memset(indent, ' ', depth); indent[depth] = '\0'; for (i = 0, cp = c->slots; i < 16; i++, cp++) { if (0 == *cp) { /*printf("%s%02u:\n", indent, i);*/ } else { if (0 == (*cp)->key && Qundef == (*cp)->value) { printf("%s%02u:\n", indent, i); } else { const char *vs; const char *clas; if (Qundef == (*cp)->value) { vs = "undefined"; clas = ""; } else { VALUE rs = rb_funcall2((*cp)->value, rb_intern("to_s"), 0, 0); vs = StringValuePtr(rs); clas = rb_class2name(rb_obj_class((*cp)->value)); } printf("%s%02u: %s = %s (%s)\n", indent, i, (*cp)->key, vs, clas); } slot_print(*cp, depth + 2); } } } ox-2.11.0/ext/ox/buf.h0000644000004100000410000001025713502763477014447 0ustar www-datawww-data/* buf.h * Copyright (c) 2014, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef OX_BUF_H #define OX_BUF_H #include #include typedef struct _buf { char *head; char *end; char *tail; int fd; bool err; char base[16384]; } *Buf; inline static void buf_init(Buf buf, int fd, long initial_size) { if (sizeof(buf->base) < (size_t)initial_size) { buf->head = ALLOC_N(char, initial_size); buf->end = buf->head + initial_size - 1; } else { buf->head = buf->base; buf->end = buf->base + sizeof(buf->base) - 1; } buf->tail = buf->head; buf->fd = fd; buf->err = false; } inline static void buf_reset(Buf buf) { buf->head = buf->base; buf->tail = buf->head; } inline static void buf_cleanup(Buf buf) { if (buf->base != buf->head) { free(buf->head); } } inline static size_t buf_len(Buf buf) { return buf->tail - buf->head; } inline static void buf_append_string(Buf buf, const char *s, size_t slen) { if (buf->err) { return; } if (buf->end <= buf->tail + slen) { if (0 != buf->fd) { size_t len = buf->tail - buf->head; if (len != (size_t)write(buf->fd, buf->head, len)) { buf->err = true; } buf->tail = buf->head; } else { size_t len = buf->end - buf->head; size_t toff = buf->tail - buf->head; size_t new_len = len + slen + len / 2; if (buf->base == buf->head) { buf->head = ALLOC_N(char, new_len); memcpy(buf->head, buf->base, len); } else { REALLOC_N(buf->head, char, new_len); } buf->tail = buf->head + toff; buf->end = buf->head + new_len - 2; } } if (0 < slen) { memcpy(buf->tail, s, slen); } buf->tail += slen; } inline static void buf_append(Buf buf, char c) { if (buf->err) { return; } if (buf->end <= buf->tail) { if (0 != buf->fd) { size_t len = buf->tail - buf->head; if (len != (size_t)write(buf->fd, buf->head, len)) { buf->err = true; } buf->tail = buf->head; } else { size_t len = buf->end - buf->head; size_t toff = buf->tail - buf->head; size_t new_len = len + len / 2; if (buf->base == buf->head) { buf->head = ALLOC_N(char, new_len); memcpy(buf->head, buf->base, len); } else { REALLOC_N(buf->head, char, new_len); } buf->tail = buf->head + toff; buf->end = buf->head + new_len - 2; } } *buf->tail++ = c; //*buf->tail = '\0'; // for debugging } inline static void buf_finish(Buf buf) { if (buf->err) { return; } if (0 != buf->fd) { size_t len = buf->tail - buf->head; if (0 < len && len != (size_t)write(buf->fd, buf->head, len)) { buf->err = true; } fsync(buf->fd); buf->tail = buf->head; } } #endif /* OX_BUF_H */ ox-2.11.0/ext/ox/parse.c0000644000004100000410000006522313502763477015003 0ustar www-datawww-data/* parse.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include "ruby.h" #include "ox.h" #include "err.h" #include "attr.h" #include "helper.h" #include "special.h" static void read_instruction(PInfo pi); static void read_doctype(PInfo pi); static void read_comment(PInfo pi); static char* read_element(PInfo pi); static void read_text(PInfo pi); /*static void read_reduced_text(PInfo pi); */ static void read_cdata(PInfo pi); static char* read_name_token(PInfo pi); static char* read_quoted_value(PInfo pi); static char* read_hex_uint64(char *b, uint64_t *up); static char* read_10_uint64(char *b, uint64_t *up); static char* read_coded_chars(PInfo pi, char *text); static void next_non_white(PInfo pi); static int collapse_special(PInfo pi, char *str); /* This XML parser is a single pass, destructive, callback parser. It is a * single pass parse since it only make one pass over the characters in the * XML document string. It is destructive because it re-uses the content of * the string for values in the callback and places \0 characters at various * places to mark the end of tokens and strings. It is a callback parser like * a SAX parser because it uses callback when document elements are * encountered. * * Parsing is very tolerant. Lack of headers and even mispelled element * endings are passed over without raising an error. A best attempt is made in * all cases to parse the string. */ static char xml_valid_lower_chars[34] = "xxxxxxxxxooxxoxxxxxxxxxxxxxxxxxxo"; inline static int is_white(char c) { switch (c) { case ' ': case '\t': case '\f': case '\n': case '\r': return 1; default: return 0; } } inline static void next_non_white(PInfo pi) { for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: return; } } } inline static void next_white(PInfo pi) { for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '\n': case '\r': case '\0': return; default: break; } } } static void mark_pi_cb(void *ptr) { if (NULL != ptr) { HelperStack stack = &((PInfo)ptr)->helpers; Helper h; for (h = stack->head; h < stack->tail; h++) { if (NoCode != h->type) { rb_gc_mark(h->obj); } } } } VALUE ox_parse(char *xml, size_t len, ParseCallbacks pcb, char **endp, Options options, Err err) { struct _pInfo pi; int body_read = 0; int block_given = rb_block_given_p(); volatile VALUE wrap; if (0 == xml) { set_error(err, "Invalid arg, xml string can not be null", xml, 0); return Qnil; } if (DEBUG <= options->trace) { printf("Parsing xml:\n%s\n", xml); } /* initialize parse info */ helper_stack_init(&pi.helpers); // Protect against GC wrap = Data_Wrap_Struct(rb_cObject, mark_pi_cb, NULL, &pi); err_init(&pi.err); pi.str = xml; pi.end = pi.str + len; pi.s = xml; pi.pcb = pcb; pi.obj = Qnil; pi.circ_array = 0; pi.options = options; while (1) { next_non_white(&pi); /* skip white space */ if ('\0' == *pi.s) { break; } if (body_read && 0 != endp) { *endp = pi.s; break; } if ('<' != *pi.s) { /* all top level entities start with < */ set_error(err, "invalid format, expected <", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; } pi.s++; /* past < */ switch (*pi.s) { case '?': /* processing instruction */ pi.s++; read_instruction(&pi); break; case '!': /* comment or doctype */ pi.s++; if ('\0' == *pi.s) { set_error(err, "invalid format, DOCTYPE or comment not terminated", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; } else if ('-' == *pi.s) { pi.s++; /* skip - */ if ('-' != *pi.s) { set_error(err, "invalid format, bad comment format", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; } else { pi.s++; /* skip second - */ read_comment(&pi); } } else if ((TolerantEffort == options->effort) ? 0 == strncasecmp("DOCTYPE", pi.s, 7) : 0 == strncmp("DOCTYPE", pi.s, 7)) { pi.s += 7; read_doctype(&pi); } else { set_error(err, "invalid format, DOCTYPE or comment expected", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; } break; case '\0': set_error(err, "invalid format, document not terminated", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; default: read_element(&pi); body_read = 1; break; } if (err_has(&pi.err)) { *err = pi.err; helper_stack_cleanup(&pi.helpers); return Qnil; } if (block_given && Qnil != pi.obj && Qundef != pi.obj) { if (NULL != pcb->finish) { pcb->finish(&pi); } rb_yield(pi.obj); } } DATA_PTR(wrap) = NULL; helper_stack_cleanup(&pi.helpers); if (NULL != pcb->finish) { pcb->finish(&pi); } return pi.obj; } static char* gather_content(const char *src, char *content, size_t len) { for (; 0 < len; src++, content++, len--) { switch (*src) { case '?': if ('>' == *(src + 1)) { *content = '\0'; return (char*)(src + 1); } *content = *src; break; case '\0': return 0; default: *content = *src; break; } } return 0; } /* Entered after the "s; if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) { set_error(&pi->err, "processing instruction content too large or not terminated", pi->str, pi->s); return; } next_non_white(pi); c = *pi->s; *end = '\0'; /* terminate name */ if ('?' != c) { while ('?' != c) { pi->last = 0; if ('\0' == *pi->s) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, processing instruction not terminated", pi->str, pi->s); return; } next_non_white(pi); if (0 == (attr_name = read_name_token(pi))) { attr_stack_cleanup(&attrs); return; } end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { attrs_ok = 0; break; } *end = '\0'; /* terminate name */ /* read value */ next_non_white(pi); if (0 == (attr_value = read_quoted_value(pi))) { attr_stack_cleanup(&attrs); return; } attr_stack_push(&attrs, attr_name, attr_value); next_non_white(pi); if ('\0' == pi->last) { c = *pi->s; } else { c = pi->last; } } if ('?' == *pi->s) { pi->s++; } } else { pi->s++; } if (attrs_ok) { if ('>' != *pi->s++) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, processing instruction not terminated", pi->str, pi->s); return; } } else { pi->s = cend + 1; } if (0 != pi->pcb->instruct) { if (attrs_ok) { pi->pcb->instruct(pi, target, attrs.head, 0); } else { pi->pcb->instruct(pi, target, attrs.head, content); } } attr_stack_cleanup(&attrs); } static void read_delimited(PInfo pi, char end) { char c; if ('"' == end || '\'' == end) { for (c = *pi->s++; end != c; c = *pi->s++) { if ('\0' == c) { set_error(&pi->err, "invalid format, dectype not terminated", pi->str, pi->s); return; } } } else { while (1) { c = *pi->s++; if (end == c) { return; } switch (c) { case '\0': set_error(&pi->err, "invalid format, dectype not terminated", pi->str, pi->s); return; case '"': read_delimited(pi, c); break; case '\'': read_delimited(pi, c); break; case '[': read_delimited(pi, ']'); break; case '<': read_delimited(pi, '>'); break; default: break; } } } } /* Entered after the "s; read_delimited(pi, '>'); if (err_has(&pi->err)) { return; } pi->s--; *pi->s = '\0'; pi->s++; if (0 != pi->pcb->add_doctype) { pi->pcb->add_doctype(pi, docType); } } /* Entered after ""); if (0 == end) { set_error(&pi->err, "invalid format, comment not terminated", pi->str, pi->s); return; } for (s = end - 1; pi->s < s && !done; s--) { switch(*s) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: *(s + 1) = '\0'; done = 1; break; } } *end = '\0'; /* in case the comment was blank */ pi->s = end + 3; if (0 != pi->pcb->add_comment) { pi->pcb->add_comment(pi, comment); } } /* Entered after the '<' and the first character after that. Returns status * code. */ static char* read_element(PInfo pi) { struct _attrStack attrs; const char *attr_name; const char *attr_value; char *name; char *ename; char *end; char c; long elen; int hasChildren = 0; int done = 0; attr_stack_init(&attrs); if (0 == (ename = read_name_token(pi))) { return 0; } end = pi->s; elen = end - ename; next_non_white(pi); c = *pi->s; *end = '\0'; if ('/' == c) { /* empty element, no attributes and no children */ pi->s++; if ('>' != *pi->s) { /*printf("*** '%s' ***\n", pi->s); */ attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s); return 0; } pi->s++; /* past > */ pi->pcb->add_element(pi, ename, attrs.head, hasChildren); pi->pcb->end_element(pi, ename); attr_stack_cleanup(&attrs); return 0; } /* read attribute names until the close (/ or >) is reached */ while (!done) { if ('\0' == c) { if (pi->end <= pi->s) { break; } next_non_white(pi); c = *pi->s; } pi->last = 0; switch (c) { case '\0': attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; case '/': /* Element with just attributes. */ pi->s++; if ('>' != *pi->s) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s); return 0; } pi->s++; pi->pcb->add_element(pi, ename, attrs.head, hasChildren); pi->pcb->end_element(pi, ename); attr_stack_cleanup(&attrs); return 0; case '>': /* has either children or a value */ pi->s++; hasChildren = 1; done = 1; pi->pcb->add_element(pi, ename, attrs.head, hasChildren); break; default: /* Attribute name so it's an element and the attribute will be */ /* added to it. */ if (0 == (attr_name = read_name_token(pi))) { attr_stack_cleanup(&attrs); return 0; } end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { if (TolerantEffort == pi->options->effort) { pi->s--; pi->last = *pi->s; *end = '\0'; /* terminate name */ attr_value = ""; attr_stack_push(&attrs, attr_name, attr_value); break; } else { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, no attribute value", pi->str, pi->s); return 0; } } *end = '\0'; /* terminate name */ /* read value */ next_non_white(pi); if (0 == (attr_value = read_quoted_value(pi))) { return 0; } if (pi->options->convert_special && 0 != strchr(attr_value, '&')) { if (0 != collapse_special(pi, (char*)attr_value) || err_has(&pi->err)) { attr_stack_cleanup(&attrs); return 0; } } attr_stack_push(&attrs, attr_name, attr_value); break; } if ('\0' == pi->last) { c = '\0'; } else { c = pi->last; pi->last = '\0'; } } if (hasChildren) { char *start; int first = 1; done = 0; /* read children */ while (!done) { start = pi->s; next_non_white(pi); c = *pi->s++; if ('\0' == c) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; } if ('<' == c) { char *slash; switch (*pi->s) { case '!': /* better be a comment or CDATA */ pi->s++; if ('-' == *pi->s && '-' == *(pi->s + 1)) { pi->s += 2; read_comment(pi); } else if ((TolerantEffort == pi->options->effort) ? 0 == strncasecmp("[CDATA[", pi->s, 7) : 0 == strncmp("[CDATA[", pi->s, 7)) { pi->s += 7; read_cdata(pi); } else { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, invalid comment or CDATA format", pi->str, pi->s); return 0; } break; case '?': /* processing instruction */ pi->s++; read_instruction(pi); break; case '/': slash = pi->s; pi->s++; if (0 == (name = read_name_token(pi))) { attr_stack_cleanup(&attrs); return 0; } end = pi->s; next_non_white(pi); c = *pi->s; *end = '\0'; if (0 != ((TolerantEffort == pi->options->effort) ? strcasecmp(name, ename) : strcmp(name, ename))) { attr_stack_cleanup(&attrs); if (TolerantEffort == pi->options->effort) { pi->pcb->end_element(pi, ename); return name; } else { set_error(&pi->err, "invalid format, elements overlap", pi->str, pi->s); return 0; } } if ('>' != c) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s); return 0; } if (first && start != slash - 1) { // Some white space between start and here so add as // text after checking skip. *(slash - 1) = '\0'; switch (pi->options->skip) { case CrSkip: { char *s = start; char *e = start; for (; '\0' != *e; e++) { if ('\r' != *e) { *s++ = *e; } } *s = '\0'; break; } case SpcSkip: *start = '\0'; break; case NoSkip: case OffSkip: default: break; } if ('\0' != *start) { pi->pcb->add_text(pi, start, 1); } } pi->s++; pi->pcb->end_element(pi, ename); attr_stack_cleanup(&attrs); return 0; case '\0': attr_stack_cleanup(&attrs); if (TolerantEffort == pi->options->effort) { return 0; } else { set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; } default: first = 0; /* a child element */ // Child closed with mismatched name. if (0 != (name = read_element(pi))) { attr_stack_cleanup(&attrs); if (0 == ((TolerantEffort == pi->options->effort) ? strcasecmp(name, ename) : strcmp(name, ename))) { pi->s++; pi->pcb->end_element(pi, ename); return 0; } else { // not the correct element yet pi->pcb->end_element(pi, ename); return name; } } else if (err_has(&pi->err)) { return 0; } break; } } else { /* read as TEXT */ pi->s = start; /*pi->s--; */ read_text(pi); /*read_reduced_text(pi); */ /* to exit read_text with no errors the next character must be < */ if ('/' == *(pi->s + 1) && 0 == ((TolerantEffort == pi->options->effort) ? strncasecmp(ename, pi->s + 2, elen) : strncmp(ename, pi->s + 2, elen)) && '>' == *(pi->s + elen + 2)) { /* close tag after text so treat as a value */ pi->s += elen + 3; pi->pcb->end_element(pi, ename); attr_stack_cleanup(&attrs); return 0; } } } } attr_stack_cleanup(&attrs); return 0; } static void read_text(PInfo pi) { char buf[MAX_TEXT_LEN]; char *b = buf; char *alloc_buf = 0; char *end = b + sizeof(buf) - 2; char c; int done = 0; while (!done) { c = *pi->s++; switch(c) { case '<': done = 1; pi->s--; break; case '\0': set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return; default: if (end <= (b + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */ unsigned long size; if (0 == alloc_buf) { size = sizeof(buf) * 2; alloc_buf = ALLOC_N(char, size); memcpy(alloc_buf, buf, b - buf); b = alloc_buf + (b - buf); } else { unsigned long pos = b - alloc_buf; size = (end - alloc_buf) * 2; REALLOC_N(alloc_buf, char, size); b = alloc_buf + pos; } end = alloc_buf + size - 2; } if ('&' == c) { if (0 == (b = read_coded_chars(pi, b))) { return; } } else { if (0 <= c && c <= 0x20) { if (StrictEffort == pi->options->effort && 'x' == xml_valid_lower_chars[(unsigned char)c]) { set_error(&pi->err, "invalid character", pi->str, pi->s); return; } switch (pi->options->skip) { case CrSkip: if (buf != b && '\n' == c && '\r' == *(b - 1)) { *(b - 1) = '\n'; } else { *b++ = c; } break; case SpcSkip: if (is_white(c)) { if (buf == b || ' ' != *(b - 1)) { *b++ = ' '; } } else { *b++ = c; } break; case NoSkip: case OffSkip: default: *b++ = c; break; } } else { *b++ = c; } } break; } } *b = '\0'; if (0 != alloc_buf) { pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1))); xfree(alloc_buf); } else { pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1))); } } #if 0 static void read_reduced_text(PInfo pi) { char buf[MAX_TEXT_LEN]; char *b = buf; char *alloc_buf = 0; char *end = b + sizeof(buf) - 2; char c; int spc = 0; int done = 0; while (!done) { c = *pi->s++; switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': spc = 1; break; case '<': done = 1; pi->s--; break; case '\0': set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return; default: if (end <= (b + spc + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */ unsigned long size; if (0 == alloc_buf) { size = sizeof(buf) * 2; alloc_buf = ALLOC_N(char, size); memcpy(alloc_buf, buf, b - buf); b = alloc_buf + (b - buf); } else { unsigned long pos = b - alloc_buf; size = (end - alloc_buf) * 2; REALLOC(alloc_buf, char, size); b = alloc_buf + pos; } end = alloc_buf + size - 2; } if (spc) { *b++ = ' '; } spc = 0; if ('&' == c) { if (0 == (b = read_coded_chars(pi, b))) { return; } } else { *b++ = c; } break; } } *b = '\0'; if (0 != alloc_buf) { pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1))); xfree(alloc_buf); } else { pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1))); } } #endif static char* read_name_token(PInfo pi) { char *start; next_non_white(pi); start = pi->s; for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '?': case '=': case '/': case '>': case '\n': case '\r': return start; case '\0': /* documents never terminate after a name token */ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; break; /* to avoid warnings */ case ':': if ('\0' == *pi->options->strip_ns) { break; } else if ('*' == *pi->options->strip_ns && '\0' == pi->options->strip_ns[1]) { start = pi->s + 1; } else if (0 == strncmp(pi->options->strip_ns, start, pi->s - start)) { start = pi->s + 1; } break; default: break; } } return start; } static void read_cdata(PInfo pi) { char *start; char *end; start = pi->s; end = strstr(pi->s, "]]>"); if (end == 0) { set_error(&pi->err, "invalid format, CDATA not terminated", pi->str, pi->s); return; } *end = '\0'; pi->s = end + 3; if (0 != pi->pcb->add_cdata) { pi->pcb->add_cdata(pi, start, end - start); } } /* Assume the value starts immediately and goes until the quote character is * reached again. Do not read the character after the terminating quote. */ static char* read_quoted_value(PInfo pi) { char *value = 0; if ('"' == *pi->s || '\'' == *pi->s) { char term = *pi->s; pi->s++; /* skip quote character */ value = pi->s; for (; *pi->s != term; pi->s++) { if ('\0' == *pi->s) { set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; } } *pi->s = '\0'; /* terminate value */ pi->s++; /* move past quote */ } else if (StrictEffort == pi->options->effort) { set_error(&pi->err, "invalid format, expected a quote character", pi->str, pi->s); return 0; } else if (TolerantEffort == pi->options->effort) { value = pi->s; for (; 1; pi->s++) { switch (*pi->s) { case '\0': set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; case ' ': case '/': case '>': case '?': // for instructions case '\t': case '\n': case '\r': pi->last = *pi->s; *pi->s = '\0'; /* terminate value */ pi->s++; return value; default: break; } } } else { value = pi->s; next_white(pi); if ('\0' == *pi->s) { set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; } *pi->s++ = '\0'; /* terminate value */ } return value; } static char* read_hex_uint64(char *b, uint64_t *up) { uint64_t u = 0; char c; for (; ';' != *b; b++) { c = *b; if ('0' <= c && c <= '9') { u = (u << 4) | (uint64_t)(c - '0'); } else if ('a' <= c && c <= 'f') { u = (u << 4) | (uint64_t)(c - 'a' + 10); } else if ('A' <= c && c <= 'F') { u = (u << 4) | (uint64_t)(c - 'A' + 10); } else { return 0; } } *up = u; return b; } static char* read_10_uint64(char *b, uint64_t *up) { uint64_t u = 0; char c; for (; ';' != *b; b++) { c = *b; if ('0' <= c && c <= '9') { u = (u * 10) + (uint64_t)(c - '0'); } else { return 0; } } *up = u; return b; } static char* read_coded_chars(PInfo pi, char *text) { char *b, buf[32]; char *end = buf + sizeof(buf) - 1; char *s; for (b = buf, s = pi->s; b < end; b++, s++) { *b = *s; if (';' == *s) { *(b + 1) = '\0'; s++; break; } } if (b > end) { *text++ = '&'; } else if ('#' == *buf) { uint64_t u = 0; b = buf + 1; if ('x' == *b || 'X' == *b) { b = read_hex_uint64(b + 1, &u); } else { b = read_10_uint64(b, &u); } if (0 == b) { *text++ = '&'; } else { if (u <= 0x000000000000007FULL) { *text++ = (char)u; #if HAS_PRIVATE_ENCODING } else if (ox_utf8_encoding == pi->options->rb_enc || 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) { #else } else if (ox_utf8_encoding == pi->options->rb_enc) { #endif text = ox_ucs_to_utf8_chars(text, u); #if HAS_PRIVATE_ENCODING } else if (Qnil == pi->options->rb_enc) { #else } else if (0 == pi->options->rb_enc) { #endif pi->options->rb_enc = ox_utf8_encoding; text = ox_ucs_to_utf8_chars(text, u); } else if (TolerantEffort == pi->options->effort) { *text++ = '&'; return text; } else if (u <= 0x00000000000000FFULL) { *text++ = (char)u; } else { /*set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */ set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); return 0; } pi->s = s; } } else if (0 == strcasecmp(buf, "nbsp;")) { pi->s = s; *text++ = ' '; } else if (0 == strcasecmp(buf, "lt;")) { pi->s = s; *text++ = '<'; } else if (0 == strcasecmp(buf, "gt;")) { pi->s = s; *text++ = '>'; } else if (0 == strcasecmp(buf, "amp;")) { pi->s = s; *text++ = '&'; } else if (0 == strcasecmp(buf, "quot;")) { pi->s = s; *text++ = '"'; } else if (0 == strcasecmp(buf, "apos;")) { pi->s = s; *text++ = '\''; } else { *text++ = '&'; } return text; } static int collapse_special(PInfo pi, char *str) { char *s = str; char *b = str; while ('\0' != *s) { if ('&' == *s) { int c; char *end; s++; if ('#' == *s) { uint64_t u = 0; char x; s++; if ('x' == *s || 'X' == *s) { x = *s; s++; end = read_hex_uint64(s, &u); } else { x = '\0'; end = read_10_uint64(s, &u); } if (0 == end) { if (TolerantEffort == pi->options->effort) { *b++ = '&'; *b++ = '#'; if ('\0' != x) { *b++ = x; } continue; } return EDOM; } if (u <= 0x000000000000007FULL) { *b++ = (char)u; #if HAS_PRIVATE_ENCODING } else if (ox_utf8_encoding == pi->options->rb_enc || 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) { #else } else if (ox_utf8_encoding == pi->options->rb_enc) { #endif b = ox_ucs_to_utf8_chars(b, u); /* TBD support UTF-16 */ #if HAS_PRIVATE_ENCODING } else if (Qnil == pi->options->rb_enc) { #else } else if (0 == pi->options->rb_enc) { #endif pi->options->rb_enc = ox_utf8_encoding; b = ox_ucs_to_utf8_chars(b, u); } else { /* set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/ set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); return 0; } s = end + 1; } else { if (0 == strncasecmp(s, "lt;", 3)) { c = '<'; s += 3; } else if (0 == strncasecmp(s, "gt;", 3)) { c = '>'; s += 3; } else if (0 == strncasecmp(s, "amp;", 4)) { c = '&'; s += 4; } else if (0 == strncasecmp(s, "quot;", 5)) { c = '"'; s += 5; } else if (0 == strncasecmp(s, "apos;", 5)) { c = '\''; s += 5; } else if (TolerantEffort == pi->options->effort) { *b++ = '&'; continue; } else { c = '?'; while (';' != *s++) { if ('\0' == *s) { set_error(&pi->err, "Invalid format, special character does not end with a semicolon", pi->str, pi->s); return EDOM; } } s++; set_error(&pi->err, "Invalid format, invalid special character sequence", pi->str, pi->s); return 0; } *b++ = (char)c; } } else { *b++ = *s++; } } *b = '\0'; return 0; } ox-2.11.0/ext/ox/sax_has.h0000644000004100000410000000316313502763477015317 0ustar www-datawww-data/* sax_has.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_SAX_HAS_H #define OX_SAX_HAS_H typedef struct _has { int instruct; int end_instruct; int attr; int attrs_done; int attr_value; int doctype; int comment; int cdata; int text; int value; int start_element; int end_element; int error; int pos; int line; int column; } *Has; inline static int respond_to(VALUE obj, ID method) { return rb_respond_to(obj, method); } inline static void has_init(Has has, VALUE handler) { has->instruct = respond_to(handler, ox_instruct_id); has->end_instruct = respond_to(handler, ox_end_instruct_id); has->attr = respond_to(handler, ox_attr_id); has->attr_value = respond_to(handler, ox_attr_value_id); has->attrs_done = respond_to(handler, ox_attrs_done_id); has->doctype = respond_to(handler, ox_doctype_id); has->comment = respond_to(handler, ox_comment_id); has->cdata = respond_to(handler, ox_cdata_id); has->text = respond_to(handler, ox_text_id); has->value = respond_to(handler, ox_value_id); has->start_element = respond_to(handler, ox_start_element_id); has->end_element = respond_to(handler, ox_end_element_id); has->error = respond_to(handler, ox_error_id); has->pos = (Qtrue == rb_ivar_defined(handler, ox_at_pos_id)); has->line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id)); has->column = (Qtrue == rb_ivar_defined(handler, ox_at_column_id)); } #endif /* OX_SAX_HAS_H */ ox-2.11.0/ext/ox/attr.h0000644000004100000410000000357313502763477014650 0ustar www-datawww-data/* attr.h * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #ifndef OX_ATTR_H #define OX_ATTR_H #include "ox.h" #define ATTR_STACK_INC 8 typedef struct _attr { const char *name; const char *value; } *Attr; typedef struct _attrStack { struct _attr base[ATTR_STACK_INC]; Attr head; /* current stack */ Attr end; /* stack end */ Attr tail; /* pointer to one past last element name on stack */ } *AttrStack; inline static void attr_stack_init(AttrStack stack) { stack->head = stack->base; stack->end = stack->base + sizeof(stack->base) / sizeof(struct _attr); stack->tail = stack->head; stack->head->name = 0; } inline static int attr_stack_empty(AttrStack stack) { return (stack->head == stack->tail); } inline static void attr_stack_cleanup(AttrStack stack) { if (stack->base != stack->head) { xfree(stack->head); stack->head = stack->base; } } inline static void attr_stack_push(AttrStack stack, const char *name, const char *value) { if (stack->end <= stack->tail + 1) { size_t len = stack->end - stack->head; size_t toff = stack->tail - stack->head; if (stack->base == stack->head) { stack->head = ALLOC_N(struct _attr, len + ATTR_STACK_INC); memcpy(stack->head, stack->base, sizeof(struct _attr) * len); } else { REALLOC_N(stack->head, struct _attr, len + ATTR_STACK_INC); } stack->tail = stack->head + toff; stack->end = stack->head + len + ATTR_STACK_INC; } stack->tail->name = name; stack->tail->value = value; stack->tail++; stack->tail->name = 0; // terminate } inline static Attr attr_stack_peek(AttrStack stack) { if (stack->head < stack->tail) { return stack->tail - 1; } return 0; } inline static Attr attr_stack_pop(AttrStack stack) { if (stack->head < stack->tail) { stack->tail--; return stack->tail; } return 0; } #endif /* OX_ATTR_H */ ox-2.11.0/ext/ox/sax.c0000644000004100000410000013165013502763477014462 0ustar www-datawww-data/* sax.c * Copyright (c) 2011, Peter Ohler * All rights reserved. */ #include #include #include #include #include #include #if NEEDS_UIO #include #endif #include #include #include "ruby.h" #include "ox.h" #include "sax.h" #include "sax_stack.h" #include "sax_buf.h" #include "special.h" #define NAME_MISMATCH 1 #define START_STATE 1 #define BODY_STATE 2 #define AFTER_STATE 3 // error prefixes #define BAD_BOM "Bad BOM: " #define NO_TERM "Not Terminated: " #define INVALID_FORMAT "Invalid Format: " #define CASE_ERROR "Case Error: " #define OUT_OF_ORDER "Out of Order: " #define WRONG_CHAR "Unexpected Character: " #define EL_MISMATCH "Start End Mismatch: " #define INV_ELEMENT "Invalid Element: " #define UTF8_STR "UTF-8" static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options); static void parse(SaxDrive dr); // All read functions should return the next character after the 'thing' that was read and leave dr->cur one after that. static char read_instruction(SaxDrive dr); static char read_doctype(SaxDrive dr); static char read_cdata(SaxDrive dr); static char read_comment(SaxDrive dr); static char read_element_start(SaxDrive dr); static char read_element_end(SaxDrive dr); static char read_text(SaxDrive dr); static char read_jump(SaxDrive dr, const char *pat); static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h); static char read_name_token(SaxDrive dr); static char read_quoted_value(SaxDrive dr); static void end_element_cb(SaxDrive dr, VALUE name, int pos, int line, int col, Hint h); static void hint_clear_empty(SaxDrive dr); static Nv hint_try_close(SaxDrive dr, const char *name); VALUE ox_sax_value_class = Qnil; static VALUE protect_parse(VALUE drp) { parse((SaxDrive)drp); return Qnil; } #if HAS_ENCODING_SUPPORT || HAS_PRIVATE_ENCODING static int strIsAscii(const char *s) { for (; '\0' != *s; s++) { if (*s < ' ' || '~' < *s) { return 0; } } return 1; } #endif VALUE str2sym(SaxDrive dr, const char *str, const char **strp) { VALUE *slot; VALUE sym; if (dr->options.symbolize) { if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) { #if HAS_ENCODING_SUPPORT if (0 != dr->encoding && !strIsAscii(str)) { VALUE rstr = rb_str_new2(str); // TBD if sym can be pinned down then use this all the time rb_enc_associate(rstr, dr->encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); *slot = Qundef; } else { sym = ID2SYM(rb_intern(str)); *slot = sym; } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding && !strIsAscii(str)) { VALUE rstr = rb_str_new2(str); rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); // Needed for Ruby 2.2 to get around the GC of symbols created // with to_sym which is needed for encoded symbols. rb_ary_push(ox_sym_bank, sym); *slot = Qundef; } else { sym = ID2SYM(rb_intern(str)); *slot = sym; } #else sym = ID2SYM(rb_intern(str)); *slot = sym; #endif } } else { sym = rb_str_new2(str); #if HAS_ENCODING_SUPPORT if (0 != dr->encoding) { rb_enc_associate(sym, dr->encoding); } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding) { rb_funcall(sym, ox_force_encoding_id, 1, dr->encoding); } #endif if (0 != strp) { *strp = StringValuePtr(sym); } } return sym; } void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) { struct _saxDrive dr; int line = 0; sax_drive_init(&dr, handler, io, options); #if 0 printf("*** sax_parse with these flags\n"); printf(" has_instruct = %s\n", dr.has.instruct ? "true" : "false"); printf(" has_end_instruct = %s\n", dr.has.end_instruct ? "true" : "false"); printf(" has_attr = %s\n", dr.has.attr ? "true" : "false"); printf(" has_attr_value = %s\n", dr.has.attr_value ? "true" : "false"); printf(" has_attrs_done = %s\n", dr.has.attrs_done ? "true" : "false"); printf(" has_doctype = %s\n", dr.has.doctype ? "true" : "false"); printf(" has_comment = %s\n", dr.has.comment ? "true" : "false"); printf(" has_cdata = %s\n", dr.has.cdata ? "true" : "false"); printf(" has_text = %s\n", dr.has.text ? "true" : "false"); printf(" has_value = %s\n", dr.has.value ? "true" : "false"); printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false"); printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false"); printf(" has_error = %s\n", dr.has.error ? "true" : "false"); printf(" has_pos = %s\n", dr.has.pos ? "true" : "false"); printf(" has_line = %s\n", dr.has.line ? "true" : "false"); printf(" has_column = %s\n", dr.has.column ? "true" : "false"); #endif //parse(&dr); rb_protect(protect_parse, (VALUE)&dr, &line); ox_sax_drive_cleanup(&dr); if (0 != line) { rb_jump_tag(line); } } static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) { ox_sax_buf_init(&dr->buf, io); dr->buf.dr = dr; stack_init(&dr->stack); dr->handler = handler; dr->value_obj = Data_Wrap_Struct(ox_sax_value_class, 0, 0, dr); rb_gc_register_address(&dr->value_obj); dr->options = *options; dr->err = 0; dr->blocked = 0; dr->abort = false; has_init(&dr->has, handler); #if HAS_ENCODING_SUPPORT if ('\0' == *ox_default_options.encoding) { VALUE encoding; dr->encoding = 0; if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) { int e = rb_enc_get_index(encoding); if (0 <= e) { dr->encoding = rb_enc_from_index(e); } } } else { dr->encoding = rb_enc_find(ox_default_options.encoding); } #elif HAS_PRIVATE_ENCODING if ('\0' == *ox_default_options.encoding) { VALUE encoding; if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) { dr->encoding = encoding; } else { dr->encoding = Qnil; } } else { dr->encoding = rb_str_new2(ox_default_options.encoding); } #else dr->encoding = 0; #endif } void ox_sax_drive_cleanup(SaxDrive dr) { rb_gc_unregister_address(&dr->value_obj); buf_cleanup(&dr->buf); stack_cleanup(&dr->stack); } static void ox_sax_drive_error_at(SaxDrive dr, const char *msg, int pos, int line, int col) { if (dr->has.error) { VALUE args[3]; args[0] = rb_str_new2(msg); args[1] = LONG2NUM(line); args[2] = LONG2NUM(col); if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); } if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); } if (dr->has.line) { rb_ivar_set(dr->handler, ox_at_line_id, args[1]); } if (dr->has.column) { rb_ivar_set(dr->handler, ox_at_column_id, args[2]); } rb_funcall2(dr->handler, ox_error_id, 3, args); } } void ox_sax_drive_error(SaxDrive dr, const char *msg) { ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col); } static char skipBOM(SaxDrive dr) { char c = buf_get(&dr->buf); if (0xEF == (uint8_t)c) { /* only UTF8 is supported */ if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) { #if HAS_ENCODING_SUPPORT dr->encoding = ox_utf8_encoding; #elif HAS_PRIVATE_ENCODING dr->encoding = ox_utf8_encoding; #else dr->encoding = UTF8_STR; #endif c = buf_get(&dr->buf); } else { ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file."); c = '\0'; } } return c; } static void parse(SaxDrive dr) { char c = skipBOM(dr); int state = START_STATE; Nv parent; while ('\0' != c) { buf_protect(&dr->buf); if ('<' == c) { c = buf_get(&dr->buf); switch (c) { case '?': /* instructions (xml or otherwise) */ c = read_instruction(dr); break; case '!': /* comment or doctype */ buf_protect(&dr->buf); c = buf_get(&dr->buf); if ('\0' == c) { ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated"); goto DONE; } else if ('-' == c) { c = buf_get(&dr->buf); /* skip first - and get next character */ if ('-' != c) { ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected