ox-2.1.1/0000755000004100000410000000000012311544775012202 5ustar www-datawww-dataox-2.1.1/lib/0000755000004100000410000000000012311544775012750 5ustar www-datawww-dataox-2.1.1/lib/ox.rb0000644000004100000410000000623712311544775013733 0ustar www-datawww-data# Copyright (c) 2011, Peter Ohler
# All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # - Neither the name of Peter Ohler nor the names of its contributors may be # used to endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # === Description: # # Ox handles XML documents in two ways. It is a generic XML parser and writer as # well as a fast Object / XML marshaller. Ox was written for speed as a # replacement for Nokogiri and for Marshal. # # As an XML parser it is 2 or more times faster than Nokogiri and as a generic # XML writer it is 14 times faster than Nokogiri. Of course different files may # result in slightly different times. # # As an Object serializer Ox is 4 times faster than the standard Ruby # Marshal.dump(). Ox is 3 times faster than Marshal.load(). # # === Object Dump Sample: # # require 'ox' # # class Sample # attr_accessor :a, :b, :c # # def initialize(a, b, c) # @a = a # @b = b # @c = c # end # end # # # Create Object # obj = Sample.new(1, "bee", ['x', :y, 7.0]) # # Now dump the Object to an XML String. # xml = Ox.dump(obj) # # Convert the object back into a Sample Object. # obj2 = Ox.parse_obj(xml) # # === Generic XML Writing and Parsing: # # require 'ox' # # doc = Ox::Document.new(:version => '1.0') # # top = Ox::Element.new('top') # top[:name] = 'sample' # doc << top # # mid = Ox::Element.new('middle') # mid[:name] = 'second' # top << mid # # bot = Ox::Element.new('bottom') # bot[:name] = 'third' # mid << bot # # xml = Ox.dump(doc) # puts xml # doc2 = Ox.parse(xml) # puts "Same? #{doc == doc2}" module Ox end require 'ox/version' require 'ox/error' require 'ox/hasattrs' require 'ox/node' require 'ox/comment' require 'ox/instruct' require 'ox/cdata' require 'ox/doctype' require 'ox/element' require 'ox/document' require 'ox/bag' require 'ox/sax' require 'ox/ox' # C extension ox-2.1.1/lib/ox/0000755000004100000410000000000012311544775013376 5ustar www-datawww-dataox-2.1.1/lib/ox/instruct.rb0000644000004100000410000000250012311544775015573 0ustar www-datawww-data module Ox # An Instruct represents a processing instruction of an XML document. It has a target, attributes, and a value or # content. The content will be all characters with the exception of the target. If the content follows a regular # attribute format then the attributes will be set to the parsed values. If it does not follow the attribute formate # then the attributes will be empty. class Instruct < Node include HasAttrs # The content of the processing instruction. attr_accessor :content # Creates a new Instruct with the specified name. # @param [String] name name of the Instruct def initialize(name) super @attributes = nil @content = nil end alias target value # Returns true if this Object and other are of the same type and have the # equivalent value and the equivalent elements otherwise false is returned. # @param [Object] other Object compare _self_ to. # @return [Boolean] true if both Objects are equivalent, otherwise false. def eql?(other) return false if (other.nil? or self.class != other.class) return false unless super(other) return false unless self.attributes == other.attributes return false unless self.content == other.content true end alias == eql? end # Instruct end # Ox ox-2.1.1/lib/ox/cdata.rb0000644000004100000410000000036712311544775015005 0ustar www-datawww-data module Ox # CData represents a CDATA element in an XML document. class CData < Node # Creates a CDATA element. # @param value [String] value for the CDATA contents def initialize(value) super end end # CData end # Ox ox-2.1.1/lib/ox/comment.rb0000644000004100000410000000046712311544775015374 0ustar www-datawww-data module Ox # Coments represent XML comments in an XML document. A comment as value # attribute only. class Comment < Node # Creates a new Comment with the specified value. # @param value [String] string value for the comment def initialize(value) super end end # Comment end # Ox ox-2.1.1/lib/ox/sax.rb0000644000004100000410000000526412311544775014525 0ustar www-datawww-datamodule Ox # A SAX style parse handler. The Ox::Sax handler class should be subclasses # and then used with the Ox.sax_parse() method. The Sax methods will then be # called as the file is parsed. This is best suited for very large files or # IO streams.

# @example # # require 'ox' # # class MySax < ::Ox::Sax # def initialize() # @element_name = [] # end # # def start_element(name) # @element_names << name # end # end # # any = MySax.new() # File.open('any.xml', 'r') do |f| # Ox.sax_parse(any, f) # end # # To make the desired methods active while parsing the desired method should # be made public in the subclasses. If the methods remain private they will # not be called during parsing. The 'name' argument in the callback methods # will be a Symbol. The 'str' arguments will be a String. The 'value' # arguments will be Ox::Sax::Value objects. Since both the text() and the # value() methods are called for the same element in the XML document the the # text() method is ignored if the value() method is defined or public. The # same is true for attr() and attr_value(). When all attribtues have been read # the attr_done() callback will be invoked. # # def instruct(target); end # def end_instruct(target); end # def attr(name, str); end # def attr_value(name, value); end # def attrs_done(); end # def doctype(str); end # def comment(str); end # def cdata(str); end # def text(str); end # def value(value); end # def start_element(name); end # def end_element(name); end # # Initializing @line in the initializer will cause that variable to be updated # before each callback with the XML line number. The same is true for the # @column but it will be updated with the column in the XML file that is the # start of the element or node just read. class Sax # Create a new instance of the Sax handler class. def initialize() #@line = nil #@column = nil end # To make the desired methods active while parsing the desired method # should be made public in the subclasses. If the methods remain private # they will not be called during parsing. private def instruct(target) end def end_instruct(target) end def attr(name, str) end def attr_value(name, value) end def attrs_done() end def doctype(str) end def comment(str) end def cdata(str) end def text(str) end def value(value) end def start_element(name) end def end_element(name) end def error(message, line, column) end end # Sax end # Ox ox-2.1.1/lib/ox/document.rb0000644000004100000410000000207012311544775015540 0ustar www-datawww-data module Ox # Represents an XML document. It has a fixed set of attributes which form # the XML prolog. A Document includes Elements. class Document < Element # Create a new Document. # @param [Hash] prolog prolog attributes # @option prolog [String] :version version, typically '1.0' or '1.1' # @option prolog [String] :encoding encoding for the document, currently included but ignored # @option prolog [String] :standalone indicates the document is standalone def initialize(prolog={}) super(nil) @attributes = { } @attributes[:version] = prolog[:version] unless prolog[:version].nil? @attributes[:encoding] = prolog[:encoding] unless prolog[:encoding].nil? @attributes[:standalone] = prolog[:standalone] unless prolog[:standalone].nil? end # Returns the first Element in the document. def root() unless !instance_variable_defined?(:@nodes) || @nodes.nil? @nodes.each do |n| return n if n.is_a?(::Ox::Element) end end nil end end # Document end # Ox ox-2.1.1/lib/ox/hasattrs.rb0000644000004100000410000000366312311544775015564 0ustar www-datawww-data module Ox # An Object that includes the HasAttrs module can have attributes which are a Hash of String values and either String # or Symbol keys. # # To access the attributes there are several options. One is to walk the attributes. The easiest for simple regularly # formatted XML is to reference the attributes simply by name. module HasAttrs # Returns all the attributes of the Instruct as a Hash. # @return [Hash] all attributes and attribute values. def attributes @attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil? @attributes end # Returns the value of an attribute. # @param [Symbol|String] attr attribute name or key to return the value for def [](attr) return nil unless instance_variable_defined?(:@attributes) and @attributes.is_a?(Hash) @attributes[attr] or (attr.is_a?(String) ? @attributes[attr.to_sym] : @attributes[attr.to_s]) end # Adds or set an attribute of the Instruct. # @param [Symbol|String] attr attribute name or key # @param [Object] value value for the attribute def []=(attr, value) raise "argument to [] must be a Symbol or a String." unless attr.is_a?(Symbol) or attr.is_a?(String) @attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil? @attributes[attr] = value.to_s end # Handles the 'easy' API that allows navigating a simple XML by # referencing attributes by name. # @param [Symbol] id element or attribute name # @return [String|nil] the attribute value # @raise [NoMethodError] if no match is found def method_missing(id, *args, &block) ids = id.to_s if instance_variable_defined?(:@attributes) return @attributes[id] if @attributes.has_key?(id) return @attributes[ids] if @attributes.has_key?(ids) end raise NoMethodError.new("#{ids} not found", name) end end # HasAttrs end # Ox ox-2.1.1/lib/ox/version.rb0000644000004100000410000000010712311544775015406 0ustar www-datawww-data module Ox # Current version of the module. VERSION = '2.1.1' end ox-2.1.1/lib/ox/bag.rb0000644000004100000410000000671512311544775014465 0ustar www-datawww-data module Ox # A generic class that is used only for storing attributes. It is the base # Class for auto-generated classes in the storage system. Instance variables # are added using the instance_variable_set() method. All instance variables # can be accessed using the variable name (without the @ prefix). No setters # are provided as the Class is intended for reading only. class Bag # The initializer can take multiple arguments in the form of key values # where the key is the variable name and the value is the variable # value. This is intended for testing purposes only. # @example Ox::Bag.new(:@x => 42, :@y => 57) # @param [Hash] args instance variable symbols and their values def initialize(args={ }) args.each do |k,v| self.instance_variable_set(k, v) end end # Replaces the Object.respond_to?() method. # @param [Symbol] m method symbol # @return [Boolean] true for any method that matches an instance # variable reader, otherwise false. def respond_to?(m) return true if super at_m = ('@' + m.to_s).to_sym instance_variables.include?(at_m) end # Handles requests for variable values. Others cause an Exception to be # raised. # @param [Symbol] m method symbol # @return [Boolean] the value of the specified instance variable. # @raise [ArgumentError] if an argument is given. Zero arguments expected. # @raise [NoMethodError] if the instance variable is not defined. def method_missing(m, *args, &block) raise ArgumentError.new("wrong number of arguments (#{args.size} for 0) to method #{m}") unless args.nil? or args.empty? at_m = ('@' + m.to_s).to_sym raise NoMethodError.new("undefined method #{m}", m) unless instance_variable_defined?(at_m) instance_variable_get(at_m) end # Replaces eql?() with something more reasonable for this Class. # @param [Object] other Object to compare self to # @return [Boolean] true if each variable and value are the same, otherwise false. def eql?(other) return false if (other.nil? or self.class != other.class) ova = other.instance_variables iv = instance_variables return false if ova.size != iv.size iv.each do |vid| return false if instance_variable_get(vid) != other.instance_variable_get(vid) end true end alias == eql? # Define a new class based on the Ox::Bag class. This is used internally in # the Ox module and is available to service wrappers that receive XML # requests that include Objects of Classes not defined in the storage # process. # @param [String] classname Class name or symbol that includes Module names. # @return [Object] an instance of the specified Class. # @raise [NameError] if the classname is invalid. def self.define_class(classname) classname = classname.to_s unless classname.is_a?(String) tokens = classname.split('::').map { |n| n.to_sym } raise NameError.new("Invalid classname '#{classname}") if tokens.empty? m = Object tokens[0..-2].each do |sym| if m.const_defined?(sym) m = m.const_get(sym) else c = Module.new m.const_set(sym, c) m = c end end sym = tokens[-1] if m.const_defined?(sym) c = m.const_get(sym) else c = Class.new(Ox::Bag) m.const_set(sym, c) end c end end # Bag end # Ox ox-2.1.1/lib/ox/doctype.rb0000644000004100000410000000046612311544775015400 0ustar www-datawww-data module Ox # Represents a DOCTYPE in an XML document. class DocType < Node # Creates a DOCTYPE elements with the content as a string specified in the # value parameter. # @param value [String] string value for the element def initialize(value) super end end # DocType end # Ox ox-2.1.1/lib/ox/xmlrpc_adapter.rb0000644000004100000410000000150612311544775016732 0ustar www-datawww-data require 'ox' module Ox # This is an alternative parser for the stdlib xmlrpc library. It makes # use of Ox and is based on REXMLStreamParser. To use it set is as the # parser for an XMLRPC client: # # require 'xmlrpc/client' # require 'ox/xmlrpc_adapter' # client = XMLRPC::Client.new2('http://some_server/rpc') # client.set_parser(Ox::StreamParser.new) # class StreamParser < XMLRPC::XMLParser::AbstractStreamParser def initialize @parser_class = OxParser end class OxParser < Ox::Sax include XMLRPC::XMLParser::StreamParserMixin alias :text :character alias :end_element :endElement alias :start_element :startElement def parse(str) Ox.sax_parse(self, StringIO.new(str), :symbolize => false, :convert_special => true) end end end end ox-2.1.1/lib/ox/node.rb0000644000004100000410000000125712311544775014655 0ustar www-datawww-data module Ox # The Node is the base class for all other in the Ox module. class Node # String value associated with the Node. attr_accessor :value # Creates a new Node with the specified String value. # @param [String] value string value for the Node def initialize(value) @value = value.to_s end # Returns true if this Object and other are of the same type and have the # equivalent value otherwise false is returned. # @param [Object] other Object to compare _self_ to. def eql?(other) return false if (other.nil? or self.class != other.class) other.value == self.value end alias == eql? end # Node end # Ox ox-2.1.1/lib/ox/error.rb0000644000004100000410000000076712311544775015066 0ustar www-datawww-data module Ox class Error < StandardError end # Error # An Exception that is raised as a result of a parse error while parsing a XML document. class ParseError < Error end # ParseError # An Exception that is raised as a result of an invalid argument. class ArgError < Error end # ArgError # An Exception raised if a path is not valid. class InvalidPath < Error def initialize(path) super("#{path.join('/')} is not a valid location.") end end # InvalidPath end # Ox ox-2.1.1/lib/ox/element.rb0000644000004100000410000002100312311544775015350 0ustar www-datawww-datamodule Ox # An Element represents a element of an XML document. It has a name, # attributes, and sub-nodes. # # To access the child elements or attributes there are several options. One # is to walk the nodes and attributes. Another is to use the locate() # method. The easiest for simple regularly formatted XML is to reference the # sub elements or attributes simply by name. Repeating elements with the # same name can be referenced with an element count as well. A few examples # should explain the 'easy' API more clearly. # # *Example* # # doc = Ox.parse(%{ # # # # Peter # Ohler # # # Makie # Ohler # # # }) # # doc.People.Person.given.text # => "Peter" # doc.People.Person(1).given.text # => "Makie" # doc.People.Person.age # => "58" class Element < Node include HasAttrs # Creates a new Element with the specified name. # @param [String] name name of the Element def initialize(name) super @attributes = nil @nodes = nil end alias name value # Returns the Element's nodes array. These are the sub-elements of this # Element. # @return [Array] all child Nodes. def nodes return [] if !instance_variable_defined?(:@nodes) or @nodes.nil? @nodes end # Appends a Node to the Element's nodes array. Returns the element itself # so multiple appends can be chained together. # @param [Node] node Node to append to the nodes array def <<(node) raise "argument to << must be a String or Ox::Node." unless node.is_a?(String) or node.is_a?(Node) @nodes = [] if !instance_variable_defined?(:@nodes) or @nodes.nil? @nodes << node self end # Returns true if this Object and other are of the same type and have the # equivalent value and the equivalent elements otherwise false is returned. # @param [Object] other Object compare _self_ to. # @return [Boolean] true if both Objects are equivalent, otherwise false. def eql?(other) return false if (other.nil? or self.class != other.class) return false unless super(other) return false unless self.attributes == other.attributes return false unless self.nodes == other.nodes true end alias == eql? # Returns the first String in the elements nodes array or nil if there is # no String node. def text() nodes.each { |n| return n if n.is_a?(String) } nil end # Returns an array of Nodes or Strings that correspond to the locations # specified by the path parameter. The path parameter describes the path # to the return values which can be either nodes in the XML or # attributes. The path is a relative description. There are similarities # between the locate() method and XPath but locate does not follow the # same rules as XPath. The syntax is meant to be simpler and more Ruby # like. # # Like XPath the path delimiters are the slash (/) character. The path is # split on the delimiter and each element of the path then describes the # child of the current Element to traverse. # # Attributes are specified with an @ prefix. # # Each element name in the path can be followed by a bracket expression # that narrows the paths to traverse. Supported expressions are numbers # with a preceeding qualifier. Qualifiers are -, +, <, and >. The + # qualifier is the default. A - qualifier indicates the index begins at # the end of the children just like for Ruby Arrays. The < and > # qualifiers indicates all elements either less than or greater than # should be matched. Note that unlike XPath, the element index starts at 0 # similar to Ruby be contrary to XPath. # # Element names can also be wildcard characters. A * indicates any decendent should be followed. A ? indicates any # single Element can match the wildcard. A ^ character followed by the name of a Class will match any node of the # specified class. Valid class names are Element, Comment, String (or Text), CData, DocType. # # Examples are: # * element.locate("Family/Pete/*") returns all children of the Pete Element. # * element.locate("Family/?[1]") returns the first element in the Family Element. # * element.locate("Family/?[<3]") returns the first 3 elements in the Family Element. # * element.locate("Family/?/@age") returns the arg attribute for each child in the Family Element. # * element.locate("Family/*/@type") returns the type attribute value for decendents of the Family. # * element.locate("Family/^Comment") returns any comments that are a child of Family. # # @param [String] path path to the Nodes to locate def locate(path) return [self] if path.nil? found = [] pa = path.split('/') alocate(pa, found) found end # Handles the 'easy' API that allows navigating a simple XML by # referencing elements and attributes by name. # @param [Symbol] id element or attribute name # @return [Element|Node|String|nil] the element, attribute value, or Node identifed by the name # @raise [NoMethodError] if no match is found def method_missing(id, *args, &block) ids = id.to_s i = args[0].to_i # will be 0 if no arg or parsing fails nodes.each do |n| if (n.is_a?(Element) || n.is_a?(Instruct)) && (n.value == id || n.value == ids) return n if 0 == i i -= 1 end end if instance_variable_defined?(:@attributes) return @attributes[id] if @attributes.has_key?(id) return @attributes[ids] if @attributes.has_key?(ids) end raise NoMethodError.new("#{ids} not found", name) end # @param [Array] path array of steps in a path # @param [Array] found matching nodes def alocate(path, found) step = path[0] if step.start_with?('@') # attribute raise InvalidPath.new(path) unless 1 == path.size if instance_variable_defined?(:@attributes) step = step[1..-1] sym_step = step.to_sym @attributes.each do |k,v| found << v if ('?' == step or k == step or k == sym_step) end end else # element name if (i = step.index('[')).nil? # just name name = step qual = nil else name = step[0..i-1] raise InvalidPath.new(path) unless step.end_with?(']') i += 1 qual = step[i..i] # step[i] would be better but some rubies (jruby, ree, rbx) take that as a Fixnum. if '0' <= qual and qual <= '9' qual = '+' else i += 1 end index = step[i..-2].to_i end if '?' == name or '*' == name match = nodes elsif '^' == name[0..0] # 1.8.7 thinks name[0] is a fixnum case name[1..-1] when 'Element' match = nodes.select { |e| e.is_a?(Element) } when 'String', 'Text' match = nodes.select { |e| e.is_a?(String) } when 'Comment' match = nodes.select { |e| e.is_a?(Comment) } when 'CData' match = nodes.select { |e| e.is_a?(CData) } when 'DocType' match = nodes.select { |e| e.is_a?(DocType) } else #puts "*** no match on #{name}" match = [] end else match = nodes.select { |e| e.is_a?(Element) and name == e.name } end unless qual.nil? or match.empty? case qual when '+' match = index < match.size ? [match[index]] : [] when '-' match = index <= match.size ? [match[-index]] : [] when '<' match = 0 < index ? match[0..index - 1] : [] when '>' match = index <= match.size ? match[index + 1..-1] : [] else raise InvalidPath.new(path) end end if (1 == path.size) match.each { |n| found << n } elsif '*' == name match.each { |n| n.alocate(path, found) if n.is_a?(Element) } match.each { |n| n.alocate(path[1..-1], found) if n.is_a?(Element) } else match.each { |n| n.alocate(path[1..-1], found) if n.is_a?(Element) } end end end end # Element end # Ox ox-2.1.1/metadata.yml0000644000004100000410000000417612311544775014515 0ustar www-datawww-data--- !ruby/object:Gem::Specification name: ox version: !ruby/object:Gem::Version version: 2.1.1 platform: ruby authors: - Peter Ohler autorequire: bindir: bin cert_chain: [] date: 2014-02-12 00:00:00.000000000 Z dependencies: [] description: "A fast XML parser and object serializer that uses only standard C lib.\n \ \nOptimized XML (Ox), as the name implies was written to provide speed optimized\nXML handling. It was designed to be an alternative to Nokogiri and other Ruby\nXML parsers for generic XML parsing and as an alternative to Marshal for Object\nserialization. " email: peter@ohler.com executables: [] extensions: - ext/ox/extconf.rb extra_rdoc_files: - README.md files: - LICENSE - README.md - ext/ox/attr.h - ext/ox/base64.c - ext/ox/base64.h - ext/ox/cache.c - ext/ox/cache.h - ext/ox/cache8.c - ext/ox/cache8.h - ext/ox/cache8_test.c - ext/ox/cache_test.c - ext/ox/dump.c - ext/ox/encode.h - ext/ox/err.c - ext/ox/err.h - ext/ox/extconf.rb - ext/ox/gen_load.c - ext/ox/helper.h - ext/ox/obj_load.c - ext/ox/ox.c - ext/ox/ox.h - ext/ox/parse.c - ext/ox/sax.c - ext/ox/sax.h - ext/ox/sax_as.c - ext/ox/sax_buf.c - ext/ox/sax_buf.h - ext/ox/sax_has.h - ext/ox/sax_hint.c - ext/ox/sax_hint.h - ext/ox/sax_stack.h - ext/ox/special.c - ext/ox/special.h - ext/ox/type.h - lib/ox.rb - lib/ox/bag.rb - lib/ox/cdata.rb - lib/ox/comment.rb - lib/ox/doctype.rb - lib/ox/document.rb - lib/ox/element.rb - lib/ox/error.rb - lib/ox/hasattrs.rb - lib/ox/instruct.rb - lib/ox/node.rb - lib/ox/sax.rb - lib/ox/version.rb - lib/ox/xmlrpc_adapter.rb homepage: http://www.ohler.com/ox licenses: - MIT - GPL-3.0 metadata: {} post_install_message: rdoc_options: - "--main" - README.md require_paths: - lib - ext required_ruby_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' required_rubygems_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' requirements: [] rubyforge_project: ox rubygems_version: 2.2.0 signing_key: specification_version: 4 summary: A fast XML parser and object serializer. test_files: [] has_rdoc: true ox-2.1.1/LICENSE0000644000004100000410000000272012311544775013210 0ustar www-datawww-dataCopyright (c) 2011, Peter Ohler All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of Peter Ohler nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ox-2.1.1/checksums.yaml.gz0000444000004100000410000000041312311544775015466 0ustar www-datawww-dataORe9V0 D"tt@BEKq(n_zx_>WUŖ;f #include #include #include #include #include #include "cache.h" struct _Cache { /* The key is a length byte followed by the key as a string. If the key is longer than 254 characters then the length is 255. The key can be for a premature value and in that case the length byte is greater than the length of the key. */ char *key; VALUE value; struct _Cache *slots[16]; }; static void slot_print(Cache cache, unsigned int depth); static char* form_key(const char *s) { size_t len = strlen(s); char *d = ALLOC_N(char, len + 2); *d = (255 <= len) ? 255 : len; memcpy(d + 1, s, len + 1); return d; } void ox_cache_new(Cache *cache) { *cache = ALLOC(struct _Cache); (*cache)->key = 0; (*cache)->value = Qundef; memset((*cache)->slots, 0, sizeof((*cache)->slots)); } VALUE ox_cache_get(Cache cache, const char *key, VALUE **slot, char **keyp) { unsigned char *k = (unsigned char*)key; Cache *cp; for (; '\0' != *k; k++) { cp = cache->slots + (unsigned int)(*k >> 4); /* upper 4 bits */ if (0 == *cp) { ox_cache_new(cp); } cache = *cp; cp = cache->slots + (unsigned int)(*k & 0x0F); /* lower 4 bits */ if (0 == *cp) { /* nothing on this tree so set key and value as a premature key/value pair */ ox_cache_new(cp); cache = *cp; cache->key = form_key(key); break; } else { int depth = (int)(k - (unsigned char*)key + 1); cache = *cp; if ('\0' == *(k + 1)) { /* exact match */ if (0 == cache->key) { /* nothing in this spot so take it */ cache->key = form_key(key); break; } else if ((depth == *cache->key || 255 < depth) && 0 == strcmp(key, cache->key + 1)) { /* match */ break; } else { /* have to move the current premature key/value deeper */ unsigned char *ck = (unsigned char*)(cache->key + depth + 1); Cache orig = *cp; cp = (*cp)->slots + (*ck >> 4); ox_cache_new(cp); cp = (*cp)->slots + (*ck & 0x0F); ox_cache_new(cp); (*cp)->key = cache->key; (*cp)->value = cache->value; orig->key = form_key(key); orig->value = Qundef; } } else { /* not exact match but on the path */ if (0 != cache->key) { /* there is a key/value here already */ if (depth == *cache->key || (255 <= depth && 0 == strncmp(cache->key, key, depth) && '\0' == cache->key[depth])) { /* key belongs here */ continue; } else { unsigned char *ck = (unsigned char*)(cache->key + depth + 1); Cache orig = *cp; cp = (*cp)->slots + (*ck >> 4); ox_cache_new(cp); cp = (*cp)->slots + (*ck & 0x0F); ox_cache_new(cp); (*cp)->key = cache->key; (*cp)->value = cache->value; orig->key = 0; orig->value = Qundef; } } } } } *slot = &cache->value; if (0 != keyp) { if (0 == cache->key) { printf("*** Error: failed to set the key for %s\n", key); *keyp = 0; } else { *keyp = cache->key + 1; } } return cache->value; } void ox_cache_print(Cache cache) { /*printf("-------------------------------------------\n");*/ slot_print(cache, 0); } static void slot_print(Cache c, unsigned int depth) { char indent[256]; Cache *cp; unsigned int i; if (sizeof(indent) - 1 < depth) { depth = ((int)sizeof(indent) - 1); } memset(indent, ' ', depth); indent[depth] = '\0'; for (i = 0, cp = c->slots; i < 16; i++, cp++) { if (0 == *cp) { /*printf("%s%02u:\n", indent, i);*/ } else { if (0 == (*cp)->key && Qundef == (*cp)->value) { printf("%s%02u:\n", indent, i); } else { const char *vs; const char *clas; if (Qundef == (*cp)->value) { vs = "undefined"; clas = ""; } else { VALUE rs = rb_funcall2((*cp)->value, rb_intern("to_s"), 0, 0); vs = StringValuePtr(rs); clas = rb_class2name(rb_obj_class((*cp)->value)); } printf("%s%02u: %s = %s (%s)\n", indent, i, (*cp)->key, vs, clas); } slot_print(*cp, depth + 2); } } } ox-2.1.1/ext/ox/sax.h0000644000004100000410000000706212311544775014401 0ustar www-datawww-data/* sax.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_SAX_H__ #define __OX_SAX_H__ #include "sax_buf.h" #include "sax_has.h" #include "sax_stack.h" #include "sax_hint.h" typedef struct _SaxOptions { int symbolize; int convert_special; int smart; } *SaxOptions; typedef struct _SaxDrive { struct _Buf buf; struct _NStack stack; /* element name stack */ VALUE handler; VALUE value_obj; struct _SaxOptions options; int err; struct _Has has; Hints hints; #if HAS_ENCODING_SUPPORT rb_encoding *encoding; #elif HAS_PRIVATE_ENCODING VALUE encoding; #else const char *encoding; #endif } *SaxDrive; extern void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options); extern void ox_sax_drive_cleanup(SaxDrive dr); extern void ox_sax_drive_error(SaxDrive dr, const char *msg); extern int ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col); extern VALUE ox_sax_value_class; inline static VALUE str2sym(SaxDrive dr, const char *str, char **strp) { VALUE *slot; VALUE sym; if (dr->options.symbolize) { if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) { #if HAS_ENCODING_SUPPORT if (0 != dr->encoding) { VALUE rstr = rb_str_new2(str); rb_enc_associate(rstr, dr->encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(str)); } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding) { VALUE rstr = rb_str_new2(str); rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(str)); } #else sym = ID2SYM(rb_intern(str)); #endif *slot = sym; } } else { sym = rb_str_new2(str); #if HAS_ENCODING_SUPPORT if (0 != dr->encoding) { rb_enc_associate(sym, dr->encoding); } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding) { rb_funcall(sym, ox_force_encoding_id, 1, dr->encoding); } #endif if (0 != strp) { *strp = StringValuePtr(sym); } } return sym; } #endif /* __OX_SAX_H__ */ ox-2.1.1/ext/ox/obj_load.c0000644000004100000410000005627212311544775015361 0ustar www-datawww-data/* obj_load.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "ruby.h" #include "base64.h" #include "ox.h" static void instruct(PInfo pi, const char *target, Attr attrs, const char *content); static void add_text(PInfo pi, char *text, int closed); static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren); static void end_element(PInfo pi, const char *ename); static VALUE parse_time(const char *text, VALUE clas); static VALUE parse_xsd_time(const char *text, VALUE clas); static VALUE parse_double_time(const char *text, VALUE clas); static VALUE parse_regexp(const char *text); static VALUE get_var_sym_from_attrs(Attr a, void *encoding); static VALUE get_obj_from_attrs(Attr a, PInfo pi, VALUE base_class); static VALUE get_class_from_attrs(Attr a, PInfo pi, VALUE base_class); static VALUE classname2class(const char *name, PInfo pi, VALUE base_class); static unsigned long get_id_from_attrs(PInfo pi, Attr a); static CircArray circ_array_new(void); static void circ_array_free(CircArray ca); static void circ_array_set(CircArray ca, VALUE obj, unsigned long id); static VALUE circ_array_get(CircArray ca, unsigned long id); static void debug_stack(PInfo pi, const char *comment); static void fill_indent(PInfo pi, char *buf, size_t size); struct _ParseCallbacks _ox_obj_callbacks = { instruct, /* instruct, */ 0, /* add_doctype, */ 0, /* add_comment, */ 0, /* add_cdata, */ add_text, add_element, end_element, }; ParseCallbacks ox_obj_callbacks = &_ox_obj_callbacks; extern ParseCallbacks ox_gen_callbacks; inline static VALUE str2sym(const char *str, void *encoding) { VALUE sym; #ifdef HAVE_RUBY_ENCODING_H if (0 != encoding) { VALUE rstr = rb_str_new2(str); rb_enc_associate(rstr, (rb_encoding*)encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(str)); } #else sym = ID2SYM(rb_intern(str)); #endif return sym; } inline static ID name2var(const char *name, void *encoding) { VALUE *slot; ID var_id; if ('0' <= *name && *name <= '9') { var_id = INT2NUM(atoi(name)); } else if (Qundef == (var_id = ox_cache_get(ox_attr_cache, name, &slot, 0))) { #ifdef HAVE_RUBY_ENCODING_H if (0 != encoding) { VALUE rstr = rb_str_new2(name); VALUE sym; rb_enc_associate(rstr, (rb_encoding*)encoding); sym = rb_funcall(rstr, ox_to_sym_id, 0); var_id = SYM2ID(sym); } else { var_id = rb_intern(name); } #else var_id = rb_intern(name); #endif *slot = var_id; } return var_id; } inline static VALUE resolve_classname(VALUE mod, const char *class_name, Effort effort, VALUE base_class) { VALUE clas; ID ci = rb_intern(class_name); switch (effort) { case TolerantEffort: if (rb_const_defined_at(mod, ci)) { clas = rb_const_get_at(mod, ci); } else { clas = Qundef; } break; case AutoEffort: if (rb_const_defined_at(mod, ci)) { clas = rb_const_get_at(mod, ci); } else { clas = rb_define_class_under(mod, class_name, base_class); } break; case StrictEffort: default: /* raise an error if name is not defined */ clas = rb_const_get_at(mod, ci); break; } return clas; } inline static VALUE classname2obj(const char *name, PInfo pi, VALUE base_class) { VALUE clas = classname2class(name, pi, base_class); if (Qundef == clas) { return Qnil; } else { return rb_obj_alloc(clas); } } #if HAS_RSTRUCT inline static VALUE structname2obj(const char *name) { VALUE ost; const char *s = name; for (; 1; s++) { if ('\0' == *s) { s = name; break; } else if (':' == *s) { s += 2; break; } } ost = rb_const_get(ox_struct_class, rb_intern(s)); /* use encoding as the indicator for Ruby 1.8.7 or 1.9.x */ #if HAS_ENCODING_SUPPORT return rb_struct_alloc_noinit(ost); #elif HAS_PRIVATE_ENCODING return rb_struct_alloc_noinit(ost); #else return rb_struct_new(ost); #endif } #endif inline static VALUE parse_ulong(const char *s, PInfo pi) { unsigned long n = 0; for (; '\0' != *s; s++) { if ('0' <= *s && *s <= '9') { n = n * 10 + (*s - '0'); } else { set_error(&pi->err, "Invalid number for a julian day", pi->str, pi->s); return Qundef; } } return ULONG2NUM(n); } /* 2010-07-09T10:47:45.895826162+09:00 */ inline static VALUE parse_time(const char *text, VALUE clas) { VALUE t; if (Qnil == (t = parse_double_time(text, clas)) && Qnil == (t = parse_xsd_time(text, clas))) { VALUE args[1]; /*printf("**** time parse\n"); */ *args = rb_str_new2(text); t = rb_funcall2(ox_time_class, ox_parse_id, 1, args); } return t; } static VALUE classname2class(const char *name, PInfo pi, VALUE base_class) { VALUE *slot; VALUE clas; if (Qundef == (clas = ox_cache_get(ox_class_cache, name, &slot, 0))) { char class_name[1024]; char *s; const char *n = name; clas = rb_cObject; for (s = class_name; '\0' != *n; n++) { if (':' == *n) { *s = '\0'; n++; if (':' != *n) { set_error(&pi->err, "Invalid classname, expected another ':'", pi->str, pi->s); return Qundef; } if (Qundef == (clas = resolve_classname(clas, class_name, pi->options->effort, base_class))) { return Qundef; } s = class_name; } else { *s++ = *n; } } *s = '\0'; if (Qundef != (clas = resolve_classname(clas, class_name, pi->options->effort, base_class))) { *slot = clas; } } return clas; } static VALUE get_var_sym_from_attrs(Attr a, void *encoding) { for (; 0 != a->name; a++) { if ('a' == *a->name && '\0' == *(a->name + 1)) { return name2var(a->value, encoding); } } return Qundef; } static VALUE get_obj_from_attrs(Attr a, PInfo pi, VALUE base_class) { for (; 0 != a->name; a++) { if ('c' == *a->name && '\0' == *(a->name + 1)) { return classname2obj(a->value, pi, base_class); } } return Qundef; } #if HAS_RSTRUCT static VALUE get_struct_from_attrs(Attr a) { for (; 0 != a->name; a++) { if ('c' == *a->name && '\0' == *(a->name + 1)) { return structname2obj(a->value); } } return Qundef; } #endif static VALUE get_class_from_attrs(Attr a, PInfo pi, VALUE base_class) { for (; 0 != a->name; a++) { if ('c' == *a->name && '\0' == *(a->name + 1)) { return classname2class(a->value, pi, base_class); } } return Qundef; } static unsigned long get_id_from_attrs(PInfo pi, Attr a) { for (; 0 != a->name; a++) { if ('i' == *a->name && '\0' == *(a->name + 1)) { unsigned long id = 0; const char *text = a->value; char c; for (; '\0' != *text; text++) { c = *text; if ('0' <= c && c <= '9') { id = id * 10 + (c - '0'); } else { set_error(&pi->err, "bad number format", pi->str, pi->s); return 0; } } return id; } } return 0; } static CircArray circ_array_new() { CircArray ca; ca = ALLOC(struct _CircArray); ca->objs = ca->obj_array; ca->size = sizeof(ca->obj_array) / sizeof(VALUE); ca->cnt = 0; return ca; } static void circ_array_free(CircArray ca) { if (ca->objs != ca->obj_array) { xfree(ca->objs); } xfree(ca); } static void circ_array_set(CircArray ca, VALUE obj, unsigned long id) { if (0 < id) { unsigned long i; if (ca->size < id) { unsigned long cnt = id + 512; if (ca->objs == ca->obj_array) { ca->objs = ALLOC_N(VALUE, cnt); memcpy(ca->objs, ca->obj_array, sizeof(VALUE) * ca->cnt); } else { REALLOC_N(ca->objs, VALUE, cnt); } ca->size = cnt; } id--; for (i = ca->cnt; i < id; i++) { ca->objs[i] = Qundef; } ca->objs[id] = obj; if (ca->cnt <= id) { ca->cnt = id + 1; } } } static VALUE circ_array_get(CircArray ca, unsigned long id) { VALUE obj = Qundef; if (id <= ca->cnt) { obj = ca->objs[id - 1]; } return obj; } static VALUE parse_regexp(const char *text) { const char *te; int options = 0; te = text + strlen(text) - 1; #if HAS_ONIG for (; text < te && '/' != *te; te--) { switch (*te) { case 'i': options |= ONIG_OPTION_IGNORECASE; break; case 'm': options |= ONIG_OPTION_MULTILINE; break; case 'x': options |= ONIG_OPTION_EXTEND; break; default: break; } } #endif return rb_reg_new(text + 1, te - text - 1, options); } static void instruct(PInfo pi, const char *target, Attr attrs, const char *content) { if (0 == strcmp("xml", target)) { #if HAS_ENCODING_SUPPORT for (; 0 != attrs->name; attrs++) { if (0 == strcmp("encoding", attrs->name)) { pi->options->rb_enc = rb_enc_find(attrs->value); } } #elif HAS_PRIVATE_ENCODING for (; 0 != attrs->name; attrs++) { if (0 == strcmp("encoding", attrs->name)) { pi->options->rb_enc = rb_str_new2(attrs->value); } } #endif } } static void add_text(PInfo pi, char *text, int closed) { Helper h = helper_stack_peek(&pi->helpers); if (!closed) { set_error(&pi->err, "Text not closed", pi->str, pi->s); return; } if (0 == h) { set_error(&pi->err, "Unexpected text", pi->str, pi->s); return; } if (DEBUG <= pi->options->trace) { char indent[128]; fill_indent(pi, indent, sizeof(indent)); printf("%s '%s' to type %c\n", indent, text, h->type); } switch (h->type) { case NoCode: case StringCode: h->obj = rb_str_new2(text); #if HAS_ENCODING_SUPPORT if (0 != pi->options->rb_enc) { rb_enc_associate(h->obj, pi->options->rb_enc); } #elif HAS_PRIVATE_ENCODING if (Qnil != pi->options->rb_enc) { rb_funcall(h->obj, ox_force_encoding_id, 1, pi->options->rb_enc); } #endif if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, (unsigned long)pi->id); } break; case FixnumCode: { long n = 0; char c; int neg = 0; if ('-' == *text) { neg = 1; text++; } for (; '\0' != *text; text++) { c = *text; if ('0' <= c && c <= '9') { n = n * 10 + (c - '0'); } else { set_error(&pi->err, "bad number format", pi->str, pi->s); return; } } if (neg) { n = -n; } h->obj = LONG2NUM(n); break; } case FloatCode: h->obj = rb_float_new(strtod(text, 0)); break; case SymbolCode: { VALUE sym; VALUE *slot; if (Qundef == (sym = ox_cache_get(ox_symbol_cache, text, &slot, 0))) { sym = str2sym(text, (void*)pi->options->rb_enc); *slot = sym; } h->obj = sym; break; } case DateCode: { VALUE args[1]; if (Qundef == (*args = parse_ulong(text, pi))) { return; } h->obj = rb_funcall2(ox_date_class, ox_jd_id, 1, args); break; } case TimeCode: h->obj = parse_time(text, ox_time_class); break; case String64Code: { unsigned long str_size = b64_orig_size(text); VALUE v; char *str = ALLOCA_N(char, str_size + 1); from_base64(text, (uchar*)str); v = rb_str_new(str, str_size); #if HAS_ENCODING_SUPPORT if (0 != pi->options->rb_enc) { rb_enc_associate(v, pi->options->rb_enc); } #elif HAS_PRIVATE_ENCODING if (0 != pi->options->rb_enc) { rb_funcall(v, ox_force_encoding_id, 1, pi->options->rb_enc); } #endif if (0 != pi->circ_array) { circ_array_set(pi->circ_array, v, (unsigned long)h->obj); } h->obj = v; break; } case Symbol64Code: { VALUE sym; VALUE *slot; unsigned long str_size = b64_orig_size(text); char *str = ALLOCA_N(char, str_size + 1); from_base64(text, (uchar*)str); if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, 0))) { sym = str2sym(str, (void*)pi->options->rb_enc); *slot = sym; } h->obj = sym; break; } case RegexpCode: if ('/' == *text) { h->obj = parse_regexp(text); } else { unsigned long str_size = b64_orig_size(text); char *str = ALLOCA_N(char, str_size + 1); from_base64(text, (uchar*)str); h->obj = parse_regexp(str); } break; case BignumCode: h->obj = rb_cstr_to_inum(text, 10, 1); break; case BigDecimalCode: #if HAS_BIGDECIMAL h->obj = rb_funcall(ox_bigdecimal_class, ox_new_id, 1, rb_str_new2(text)); #else h->obj = Qnil; #endif break; default: h->obj = Qnil; break; } } static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) { Attr a; Helper h; unsigned long id; if (TRACE <= pi->options->trace) { char buf[1024]; char indent[128]; char *s = buf; char *end = buf + sizeof(buf) - 2; s += snprintf(s, end - s, " <%s%s", (hasChildren) ? "" : "/", ename); for (a = attrs; 0 != a->name; a++) { s += snprintf(s, end - s, " %s=%s", a->name, a->value); } *s++ = '>'; *s++ = '\0'; if (DEBUG <= pi->options->trace) { printf("===== add element stack(%d) =====\n", helper_stack_depth(&pi->helpers)); debug_stack(pi, buf); } else { fill_indent(pi, indent, sizeof(indent)); printf("%s%s\n", indent, buf); } } if (helper_stack_empty(&pi->helpers)) { /* top level object */ if (0 != (id = get_id_from_attrs(pi, attrs))) { pi->circ_array = circ_array_new(); } } if ('\0' != ename[1]) { set_error(&pi->err, "Invalid element name", pi->str, pi->s); return; } h = helper_stack_push(&pi->helpers, get_var_sym_from_attrs(attrs, (void*)pi->options->rb_enc), Qundef, *ename); switch (h->type) { case NilClassCode: h->obj = Qnil; break; case TrueClassCode: h->obj = Qtrue; break; case FalseClassCode: h->obj = Qfalse; break; case StringCode: /* h->obj will be replaced by add_text if it is called */ h->obj = ox_empty_string; if (0 != pi->circ_array) { pi->id = get_id_from_attrs(pi, attrs); circ_array_set(pi->circ_array, h->obj, pi->id); } break; case FixnumCode: case FloatCode: case SymbolCode: case Symbol64Code: case RegexpCode: case BignumCode: case BigDecimalCode: case ComplexCode: case DateCode: case TimeCode: case RationalCode: /* sub elements read next */ /* value will be read in the following add_text */ h->obj = Qundef; break; case String64Code: h->obj = Qundef; if (0 != pi->circ_array) { pi->id = get_id_from_attrs(pi, attrs); } break; case ArrayCode: h->obj = rb_ary_new(); if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } break; case HashCode: h->obj = rb_hash_new(); if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } break; case RangeCode: h->obj = rb_range_new(ox_zero_fixnum, ox_zero_fixnum, Qfalse); break; case RawCode: if (hasChildren) { h->obj = ox_parse(pi->s, ox_gen_callbacks, &pi->s, pi->options, &pi->err); if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } } else { h->obj = Qnil; } break; case ExceptionCode: if (Qundef == (h->obj = get_obj_from_attrs(attrs, pi, rb_eException))) { return; } if (0 != pi->circ_array && Qnil != h->obj) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } break; case ObjectCode: if (Qundef == (h->obj = get_obj_from_attrs(attrs, pi, ox_bag_clas))) { return; } if (0 != pi->circ_array && Qnil != h->obj) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } break; case StructCode: #if HAS_RSTRUCT h->obj = get_struct_from_attrs(attrs); if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs)); } #else set_error(&pi->err, "Ruby structs not supported with this verion of Ruby", pi->str, pi->s); return; #endif break; case ClassCode: if (Qundef == (h->obj = get_class_from_attrs(attrs, pi, ox_bag_clas))) { return; } break; case RefCode: h->obj = Qundef; if (0 != pi->circ_array) { h->obj = circ_array_get(pi->circ_array, get_id_from_attrs(pi, attrs)); } if (Qundef == h->obj) { set_error(&pi->err, "Invalid circular reference", pi->str, pi->s); return; } break; default: set_error(&pi->err, "Invalid element name", pi->str, pi->s); return; break; } if (DEBUG <= pi->options->trace) { debug_stack(pi, " -----------"); } } static void end_element(PInfo pi, const char *ename) { if (TRACE <= pi->options->trace) { char indent[128]; if (DEBUG <= pi->options->trace) { char buf[1024]; printf("===== end element stack(%d) =====\n", helper_stack_depth(&pi->helpers)); snprintf(buf, sizeof(buf) - 1, "", ename); debug_stack(pi, buf); } else { fill_indent(pi, indent, sizeof(indent)); printf("%s\n", indent, ename); } } if (!helper_stack_empty(&pi->helpers)) { Helper h = helper_stack_pop(&pi->helpers); Helper ph = helper_stack_peek(&pi->helpers); if (ox_empty_string == h->obj) { /* special catch for empty strings */ h->obj = rb_str_new2(""); } pi->obj = h->obj; if (0 != ph) { switch (ph->type) { case ArrayCode: rb_ary_push(ph->obj, h->obj); break; case ExceptionCode: case ObjectCode: if (Qnil != ph->obj) { rb_ivar_set(ph->obj, h->var, h->obj); } break; case StructCode: #if HAS_RSTRUCT rb_struct_aset(ph->obj, h->var, h->obj); #else set_error(&pi->err, "Ruby structs not supported with this verion of Ruby", pi->str, pi->s); return; #endif break; case HashCode: // put back h helper_stack_push(&pi->helpers, h->var, h->obj, KeyCode); break; case RangeCode: #if HAS_RSTRUCT if (ox_beg_id == h->var) { RSTRUCT_PTR(ph->obj)[0] = h->obj; } else if (ox_end_id == h->var) { RSTRUCT_PTR(ph->obj)[1] = h->obj; } else if (ox_excl_id == h->var) { RSTRUCT_PTR(ph->obj)[2] = h->obj; } else { set_error(&pi->err, "Invalid range attribute", pi->str, pi->s); return; } #else set_error(&pi->err, "Ruby structs not supported with this verion of Ruby", pi->str, pi->s); return; #endif break; case KeyCode: { Helper gh; helper_stack_pop(&pi->helpers); gh = helper_stack_peek(&pi->helpers); rb_hash_aset(gh->obj, ph->obj, h->obj); } break; case ComplexCode: #ifdef T_COMPLEX if (Qundef == ph->obj) { ph->obj = h->obj; } else { ph->obj = rb_complex_new(ph->obj, h->obj); } #else set_error(&pi->err, "Complex Objects not implemented in Ruby 1.8.7", pi->str, pi->s); return; #endif break; case RationalCode: #ifdef T_RATIONAL if (Qundef == ph->obj) { ph->obj = h->obj; } else { #ifdef RUBINIUS_RUBY ph->obj = rb_Rational(ph->obj, h->obj); #else ph->obj = rb_rational_new(ph->obj, h->obj); #endif } #else set_error(&pi->err, "Rational Objects not implemented in Ruby 1.8.7", pi->str, pi->s); return; #endif break; default: set_error(&pi->err, "Corrupt parse stack, container is wrong type", pi->str, pi->s); return; break; } } } if (0 != pi->circ_array && helper_stack_empty(&pi->helpers)) { circ_array_free(pi->circ_array); pi->circ_array = 0; } if (DEBUG <= pi->options->trace) { debug_stack(pi, " ----------"); } } static VALUE parse_double_time(const char *text, VALUE clas) { long v = 0; long v2 = 0; const char *dot = 0; char c; for (; '.' != *text; text++) { c = *text; if (c < '0' || '9' < c) { return Qnil; } v = 10 * v + (long)(c - '0'); } dot = text++; for (; '\0' != *text && text - dot <= 6; text++) { c = *text; if (c < '0' || '9' < c) { return Qnil; } v2 = 10 * v2 + (long)(c - '0'); } for (; text - dot <= 9; text++) { v2 *= 10; } #if HAS_NANO_TIME return rb_time_nano_new(v, v2); #else return rb_time_new(v, v2 / 1000); #endif } typedef struct _Tp { int cnt; char end; char alt; } *Tp; static VALUE parse_xsd_time(const char *text, VALUE clas) { long cargs[10]; long *cp = cargs; long v; int i; char c; struct _Tp tpa[10] = { { 4, '-', '-' }, { 2, '-', '-' }, { 2, 'T', 'T' }, { 2, ':', ':' }, { 2, ':', ':' }, { 2, '.', '.' }, { 9, '+', '-' }, { 2, ':', ':' }, { 2, '\0', '\0' }, { 0, '\0', '\0' } }; Tp tp = tpa; struct tm tm; for (; 0 != tp->cnt; tp++) { for (i = tp->cnt, v = 0; 0 < i ; text++, i--) { c = *text; if (c < '0' || '9' < c) { if (tp->end == c || tp->alt == c) { break; } return Qnil; } v = 10 * v + (long)(c - '0'); } c = *text++; if (tp->end != c && tp->alt != c) { return Qnil; } *cp++ = v; } tm.tm_year = (int)cargs[0] - 1900; tm.tm_mon = (int)cargs[1] - 1; tm.tm_mday = (int)cargs[2]; tm.tm_hour = (int)cargs[3]; tm.tm_min = (int)cargs[4]; tm.tm_sec = (int)cargs[5]; #if HAS_NANO_TIME return rb_time_nano_new(mktime(&tm), cargs[6]); #else return rb_time_new(mktime(&tm), cargs[6] / 1000); #endif } /* debug functions */ static void fill_indent(PInfo pi, char *buf, size_t size) { size_t cnt; if (0 < (cnt = helper_stack_depth(&pi->helpers))) { cnt *= 2; if (size < cnt + 1) { cnt = size - 1; } memset(buf, ' ', cnt); buf += cnt; } *buf = '\0'; } static void debug_stack(PInfo pi, const char *comment) { char indent[128]; Helper h; fill_indent(pi, indent, sizeof(indent)); printf("%s%s\n", indent, comment); if (!helper_stack_empty(&pi->helpers)) { for (h = pi->helpers.head; h < pi->helpers.tail; h++) { const char *clas = "---"; const char *key = "---"; if (Qundef != h->obj) { VALUE c = rb_obj_class(h->obj); clas = rb_class2name(c); } if (Qundef != h->var) { if (HashCode == h->type) { VALUE v; v = rb_funcall2(h->var, rb_intern("to_s"), 0, 0); key = StringValuePtr(v); } else if (ObjectCode == (h - 1)->type || ExceptionCode == (h - 1)->type || RangeCode == (h - 1)->type || StructCode == (h - 1)->type) { key = rb_id2name(h->var); } else { printf("%s*** corrupt stack ***\n", indent); } } printf("%s [%c] %s : %s\n", indent, h->type, clas, key); } } } ox-2.1.1/ext/ox/sax_hint.c0000644000004100000410000001474412311544775015423 0ustar www-datawww-data/* hint.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "sax_hint.h" static const char *audio_video_0[] = { "audio", "video", 0 }; static const char *colgroup_0[] = { "colgroup", 0 }; static const char *details_0[] = { "details", 0 }; static const char *dl_0[] = { "dl", 0 }; static const char *dt_th_0[] = { "dt", "th", 0 }; static const char *fieldset_0[] = { "fieldset", 0 }; static const char *figure_0[] = { "figure", 0 }; static const char *frameset_0[] = { "frameset", 0 }; static const char *head_0[] = { "head", 0 }; static const char *html_0[] = { "html", 0 }; static const char *map_0[] = { "map", 0 }; static const char *ol_ul_menu_0[] = { "ol", "ul", "menu", 0 }; static const char *optgroup_select_datalist_0[] = { "optgroup", "select", "datalist", 0 }; static const char *ruby_0[] = { "ruby", 0 }; static const char *table_0[] = { "table", 0 }; static const char *tr_0[] = { "tr", 0 }; static struct _Hint html_hint_array[] = { { "a", 0, 0, 0 }, { "abbr", 0, 0, 0 }, { "acronym", 0, 0, 0 }, { "address", 0, 0, 0 }, { "applet", 0, 0, 0 }, { "area", 1, 0, map_0 }, { "article", 0, 0, 0 }, { "aside", 0, 0, 0 }, { "audio", 0, 0, 0 }, { "b", 0, 0, 0 }, { "base", 1, 0, head_0 }, { "basefont", 1, 0, head_0 }, { "bdi", 0, 0, 0 }, { "bdo", 0, 1, 0 }, { "big", 0, 0, 0 }, { "blockquote", 0, 0, 0 }, { "body", 0, 0, html_0 }, { "br", 1, 0, 0 }, { "button", 0, 0, 0 }, { "canvas", 0, 0, 0 }, { "caption", 0, 0, table_0 }, { "center", 0, 0, 0 }, { "cite", 0, 0, 0 }, { "code", 0, 0, 0 }, { "col", 1, 0, colgroup_0 }, { "colgroup", 0, 0, 0 }, { "command", 1, 0, 0 }, { "datalist", 0, 0, 0 }, { "dd", 0, 0, dl_0 }, { "del", 0, 0, 0 }, { "details", 0, 0, 0 }, { "dfn", 0, 0, 0 }, { "dialog", 0, 0, dt_th_0 }, { "dir", 0, 0, 0 }, { "div", 0, 1, 0 }, { "dl", 0, 0, 0 }, { "dt", 0, 1, dl_0 }, { "em", 0, 0, 0 }, { "embed", 1, 0, 0 }, { "fieldset", 0, 0, 0 }, { "figcaption", 0, 0, figure_0 }, { "figure", 0, 0, 0 }, { "font", 0, 1, 0 }, { "footer", 0, 0, 0 }, { "form", 0, 0, 0 }, { "frame", 1, 0, frameset_0 }, { "frameset", 0, 0, 0 }, { "h1", 0, 0, 0 }, { "h2", 0, 0, 0 }, { "h3", 0, 0, 0 }, { "h4", 0, 0, 0 }, { "h5", 0, 0, 0 }, { "h6", 0, 0, 0 }, { "head", 0, 0, html_0 }, { "header", 0, 0, 0 }, { "hgroup", 0, 0, 0 }, { "hr", 1, 0, 0 }, { "html", 0, 0, 0 }, { "i", 0, 0, 0 }, { "iframe", 1, 0, 0 }, { "img", 1, 0, 0 }, { "input", 1, 0, 0 }, // somewhere under a form_0 { "ins", 0, 0, 0 }, { "kbd", 0, 0, 0 }, { "keygen", 1, 0, 0 }, { "label", 0, 0, 0 }, // somewhere under a form_0 { "legend", 0, 0, fieldset_0 }, { "li", 0, 0, ol_ul_menu_0 }, { "link", 1, 0, head_0 }, { "map", 0, 0, 0 }, { "mark", 0, 0, 0 }, { "menu", 0, 0, 0 }, { "meta", 1, 0, head_0 }, { "meter", 0, 0, 0 }, { "nav", 0, 0, 0 }, { "noframes", 0, 0, 0 }, { "noscript", 0, 0, 0 }, { "object", 0, 0, 0 }, { "ol", 0, 1, 0 }, { "optgroup", 0, 0, 0 }, { "option", 0, 0, optgroup_select_datalist_0 }, { "output", 0, 0, 0 }, { "p", 0, 0, 0 }, { "param", 1, 0, 0 }, { "pre", 0, 0, 0 }, { "progress", 0, 0, 0 }, { "q", 0, 0, 0 }, { "rp", 0, 0, ruby_0 }, { "rt", 0, 0, ruby_0 }, { "ruby", 0, 0, 0 }, { "s", 0, 0, 0 }, { "samp", 0, 0, 0 }, { "script", 0, 0, 0 }, { "section", 0, 1, 0 }, { "select", 0, 0, 0 }, { "small", 0, 0, 0 }, { "source", 0, 0, audio_video_0 }, { "span", 0, 1, 0 }, { "strike", 0, 0, 0 }, { "strong", 0, 0, 0 }, { "style", 0, 0, 0 }, { "sub", 0, 0, 0 }, { "summary", 0, 0, details_0 }, { "sup", 0, 0, 0 }, { "table", 0, 0, 0 }, { "tbody", 0, 0, table_0 }, { "td", 0, 0, tr_0 }, { "textarea", 0, 0, 0 }, { "tfoot", 0, 0, table_0 }, { "th", 0, 0, tr_0 }, { "thead", 0, 0, table_0 }, { "time", 0, 0, 0 }, { "title", 0, 0, head_0 }, { "tr", 0, 0, table_0 }, { "track", 1, 0, audio_video_0 }, { "tt", 0, 0, 0 }, { "u", 0, 0, 0 }, { "ul", 0, 0, 0 }, { "var", 0, 0, 0 }, { "video", 0, 0, 0 }, { "wbr", 1, 0, 0 }, }; static struct _Hints html_hints = { "HTML", html_hint_array, sizeof(html_hint_array) / sizeof(*html_hint_array) }; Hints ox_hints_html() { return &html_hints; } Hint ox_hint_find(Hints hints, const char *name) { if (0 != hints) { Hint lo = hints->hints; Hint hi = hints->hints + hints->size - 1; Hint mid; int res; if (0 == (res = strcasecmp(name, lo->name))) { return lo; } else if (0 > res) { return 0; } if (0 == (res = strcasecmp(name, hi->name))) { return hi; } else if (0 < res) { return 0; } while (1 < hi - lo) { mid = lo + (hi - lo) / 2; if (0 == (res = strcasecmp(name, mid->name))) { return mid; } else if (0 < res) { lo = mid; } else { hi = mid; } } } return 0; } ox-2.1.1/ext/ox/extconf.rb0000644000004100000410000000566612311544775015440 0ustar www-datawww-datarequire 'mkmf' extension_name = 'ox' dir_config(extension_name) parts = RUBY_DESCRIPTION.split(' ') type = parts[0].downcase() type = 'ree' if 'ruby' == type && RUBY_DESCRIPTION.include?('Ruby Enterprise Edition') is_windows = RbConfig::CONFIG['host_os'] =~ /(mingw|mswin)/ platform = RUBY_PLATFORM version = RUBY_VERSION.split('.') puts ">>>>> Creating Makefile for #{type} version #{RUBY_VERSION} on #{platform} <<<<<" dflags = { 'RUBY_TYPE' => type, (type.upcase + '_RUBY') => nil, 'RUBY_VERSION' => RUBY_VERSION, 'RUBY_VERSION_MAJOR' => version[0], 'RUBY_VERSION_MINOR' => version[1], 'RUBY_VERSION_MICRO' => version[2], 'HAS_RB_TIME_TIMESPEC' => ('ruby' == type && ('1.9.3' == RUBY_VERSION)) ? 1 : 0, #'HAS_RB_TIME_TIMESPEC' => ('ruby' == type && ('1.9.3' == RUBY_VERSION || '2' <= version[0])) ? 1 : 0, 'HAS_TM_GMTOFF' => ('ruby' == type && (('1' == version[0] && '9' == version[1]) || '2' <= version[0]) && !(platform.include?('cygwin') || platform.include?('solaris') || platform.include?('linux') || RUBY_PLATFORM =~ /(win|w)32$/)) ? 1 : 0, 'HAS_ENCODING_SUPPORT' => (('ruby' == type || 'rubinius' == type || 'macruby' == type) && (('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0, 'HAS_ONIG' => (('ruby' == type || 'jruby' == type || 'rubinius' == type) && (('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0, 'HAS_PRIVATE_ENCODING' => ('jruby' == type && '1' == version[0] && '9' == version[1]) ? 1 : 0, 'HAS_NANO_TIME' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0, 'HAS_RSTRUCT' => ('ruby' == type || 'ree' == type) ? 1 : 0, 'HAS_IVAR_HELPERS' => ('ruby' == type && !is_windows && (('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0, 'HAS_PROC_WITH_BLOCK' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0, 'HAS_GC_GUARD' => ('jruby' != type && 'rubinius' != type) ? 1 : 0, 'HAS_BIGDECIMAL' => ('jruby' != type) ? 1 : 0, 'HAS_TOP_LEVEL_ST_H' => ('ree' == type || ('ruby' == type && '1' == version[0] && '8' == version[1])) ? 1 : 0, 'NEEDS_UIO' => (RUBY_PLATFORM =~ /(win|w)32$/) ? 0 : 1, } if RUBY_PLATFORM =~ /(win|w)32$/ || RUBY_PLATFORM =~ /solaris2\.10/ dflags['NEEDS_STPCPY'] = nil end if ['i386-darwin10.0.0', 'x86_64-darwin10.8.0'].include? RUBY_PLATFORM dflags['NEEDS_STPCPY'] = nil dflags['HAS_IVAR_HELPERS'] = 0 if ('ruby' == type && '1.9.1' == RUBY_VERSION) elsif 'x86_64-linux' == RUBY_PLATFORM && '1.9.3' == RUBY_VERSION && '2011-10-30' == RUBY_RELEASE_DATE begin dflags['NEEDS_STPCPY'] = nil if `more /etc/issue`.include?('CentOS release 5.4') rescue Exception end end dflags.each do |k,v| if v.nil? $CPPFLAGS += " -D#{k}" else $CPPFLAGS += " -D#{k}=#{v}" end end $CPPFLAGS += ' -Wall' #puts "*** $CPPFLAGS: #{$CPPFLAGS}" create_makefile(extension_name) %x{make clean} ox-2.1.1/ext/ox/helper.h0000644000004100000410000000715112311544775015064 0ustar www-datawww-data/* helper.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_HELPER_H__ #define __OX_HELPER_H__ #include "type.h" #define HELPER_STACK_INC 16 typedef struct _Helper { ID var; /* Object var ID */ VALUE obj; /* object created or Qundef if not appropriate */ Type type; /* type of object in obj */ } *Helper; typedef struct _HelperStack { struct _Helper base[HELPER_STACK_INC]; Helper head; /* current stack */ Helper end; /* stack end */ Helper tail; /* pointer to one past last element name on stack */ } *HelperStack; inline static void helper_stack_init(HelperStack stack) { stack->head = stack->base; stack->end = stack->base + sizeof(stack->base) / sizeof(struct _Helper); stack->tail = stack->head; } inline static int helper_stack_empty(HelperStack stack) { return (stack->head == stack->tail); } inline static int helper_stack_depth(HelperStack stack) { return (int)(stack->tail - stack->head); } inline static void helper_stack_cleanup(HelperStack stack) { if (stack->base != stack->head) { xfree(stack->head); stack->head = stack->base; } } inline static Helper helper_stack_push(HelperStack stack, ID var, VALUE obj, Type type) { if (stack->end <= stack->tail) { size_t len = stack->end - stack->head; size_t toff = stack->tail - stack->head; if (stack->base == stack->head) { stack->head = ALLOC_N(struct _Helper, len + HELPER_STACK_INC); memcpy(stack->head, stack->base, sizeof(struct _Helper) * len); } else { REALLOC_N(stack->head, struct _Helper, len + HELPER_STACK_INC); } stack->tail = stack->head + toff; stack->end = stack->head + len + HELPER_STACK_INC; } stack->tail->var = var; stack->tail->obj = obj; stack->tail->type = type; stack->tail++; return stack->tail - 1; } inline static Helper helper_stack_peek(HelperStack stack) { if (stack->head < stack->tail) { return stack->tail - 1; } return 0; } inline static Helper helper_stack_pop(HelperStack stack) { if (stack->head < stack->tail) { stack->tail--; return stack->tail; } return 0; } #endif /* __OX_HELPER_H__ */ ox-2.1.1/ext/ox/sax_as.c0000644000004100000410000001537612311544775015066 0ustar www-datawww-data/* sax_as.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #if NEEDS_UIO #include #endif #include #include #include "ruby.h" #include "ox.h" #include "sax.h" static VALUE parse_double_time(const char *text) { long v = 0; long v2 = 0; const char *dot = 0; char c; for (; '.' != *text; text++) { c = *text; if (c < '0' || '9' < c) { return Qnil; } v = 10 * v + (long)(c - '0'); } dot = text++; for (; '\0' != *text && text - dot <= 6; text++) { c = *text; if (c < '0' || '9' < c) { return Qnil; } v2 = 10 * v2 + (long)(c - '0'); } for (; text - dot <= 9; text++) { v2 *= 10; } #if HAS_NANO_TIME return rb_time_nano_new(v, v2); #else return rb_time_new(v, v2 / 1000); #endif } typedef struct _Tp { int cnt; char end; char alt; } *Tp; static VALUE parse_xsd_time(const char *text) { long cargs[10]; long *cp = cargs; long v; int i; char c = '\0'; struct _Tp tpa[10] = { { 4, '-', '-' }, { 2, '-', '-' }, { 2, 'T', ' ' }, { 2, ':', ':' }, { 2, ':', ':' }, { 2, '.', '.' }, { 9, '+', '-' }, { 2, ':', ':' }, { 2, '\0', '\0' }, { 0, '\0', '\0' } }; Tp tp = tpa; struct tm tm; memset(cargs, 0, sizeof(cargs)); for (; 0 != tp->cnt; tp++) { for (i = tp->cnt, v = 0; 0 < i ; text++, i--) { c = *text; if (c < '0' || '9' < c) { if ('\0' == c || tp->end == c || tp->alt == c) { break; } return Qnil; } v = 10 * v + (long)(c - '0'); } if ('\0' == c) { break; } c = *text++; if (tp->end != c && tp->alt != c) { return Qnil; } *cp++ = v; } tm.tm_year = (int)cargs[0] - 1900; tm.tm_mon = (int)cargs[1] - 1; tm.tm_mday = (int)cargs[2]; tm.tm_hour = (int)cargs[3]; tm.tm_min = (int)cargs[4]; tm.tm_sec = (int)cargs[5]; #if HAS_NANO_TIME return rb_time_nano_new(mktime(&tm), cargs[6]); #else return rb_time_new(mktime(&tm), cargs[6] / 1000); #endif } static VALUE sax_value_as_s(VALUE self) { SaxDrive dr = DATA_PTR(self); VALUE rs; if ('\0' == *dr->buf.str) { return Qnil; } if (dr->options.convert_special) { ox_sax_collapse_special(dr, dr->buf.str, dr->buf.line, dr->buf.col); } rs = rb_str_new2(dr->buf.str); #if HAS_ENCODING_SUPPORT if (0 != dr->encoding) { rb_enc_associate(rs, dr->encoding); } #elif HAS_PRIVATE_ENCODING if (Qnil != dr->encoding) { rb_funcall(rs, ox_force_encoding_id, 1, dr->encoding); } #endif return rs; } static VALUE sax_value_as_sym(VALUE self) { SaxDrive dr = DATA_PTR(self); if ('\0' == *dr->buf.str) { return Qnil; } return str2sym(dr, dr->buf.str, 0); } static VALUE sax_value_as_f(VALUE self) { SaxDrive dr = DATA_PTR(self); if ('\0' == *dr->buf.str) { return Qnil; } return rb_float_new(strtod(dr->buf.str, 0)); } static VALUE sax_value_as_i(VALUE self) { SaxDrive dr = DATA_PTR(self); const char *s = dr->buf.str; long n = 0; int neg = 0; if ('\0' == *s) { return Qnil; } if ('-' == *s) { neg = 1; s++; } else if ('+' == *s) { s++; } for (; '\0' != *s; s++) { if ('0' <= *s && *s <= '9') { n = n * 10 + (*s - '0'); } else { rb_raise(ox_arg_error_class, "Not a valid Fixnum.\n"); } } if (neg) { n = -n; } return LONG2NUM(n); } static VALUE sax_value_as_time(VALUE self) { SaxDrive dr = DATA_PTR(self); const char *str = dr->buf.str; VALUE t; if ('\0' == *str) { return Qnil; } if (Qnil == (t = parse_double_time(str)) && Qnil == (t = parse_xsd_time(str))) { VALUE args[1]; /*printf("**** time parse\n"); */ *args = rb_str_new2(str); t = rb_funcall2(ox_time_class, ox_parse_id, 1, args); } return t; } static VALUE sax_value_as_bool(VALUE self) { return (0 == strcasecmp("true", ((SaxDrive)DATA_PTR(self))->buf.str)) ? Qtrue : Qfalse; } static VALUE sax_value_empty(VALUE self) { return ('\0' == *((SaxDrive)DATA_PTR(self))->buf.str) ? Qtrue : Qfalse; } void ox_sax_define() { VALUE sax_module = rb_const_get_at(Ox, rb_intern("Sax")); ox_sax_value_class = rb_define_class_under(sax_module, "Value", rb_cObject); rb_define_method(ox_sax_value_class, "as_s", sax_value_as_s, 0); rb_define_method(ox_sax_value_class, "as_sym", sax_value_as_sym, 0); rb_define_method(ox_sax_value_class, "as_i", sax_value_as_i, 0); rb_define_method(ox_sax_value_class, "as_f", sax_value_as_f, 0); rb_define_method(ox_sax_value_class, "as_time", sax_value_as_time, 0); rb_define_method(ox_sax_value_class, "as_bool", sax_value_as_bool, 0); rb_define_method(ox_sax_value_class, "empty?", sax_value_empty, 0); } ox-2.1.1/ext/ox/sax_hint.h0000644000004100000410000000375712311544775015432 0ustar www-datawww-data/* hint.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_HINT_H__ #define __OX_HINT_H__ typedef struct _Hint { const char *name; char empty; // must be closed or close auto it, not error char nest; // nesting allowed const char **parents; } *Hint; typedef struct _Hints { const char *name; Hint hints; // array of hints int size; } *Hints; extern Hints ox_hints_html(void); extern Hint ox_hint_find(Hints hints, const char *name); #endif /* __OX_HINT_H__ */ ox-2.1.1/ext/ox/base64.h0000644000004100000410000000360212311544775014666 0ustar www-datawww-data/* base64.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __BASE64_H__ #define __BASE64_H__ typedef unsigned char uchar; #define b64_size(len) ((len + 2) / 3 * 4) extern unsigned long b64_orig_size(const char *text); extern void to_base64(const uchar *src, int len, char *b64); extern void from_base64(const char *b64, uchar *str); #endif /* __BASE64_H__ */ ox-2.1.1/ext/ox/ox.c0000644000004100000410000010262512311544775014230 0ustar www-datawww-data/* ox.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include "ruby.h" #include "ox.h" #include "sax.h" /* maximum to allocate on the stack, arbitrary limit */ #define SMALL_XML 4096 typedef struct _YesNoOpt { VALUE sym; char *attr; } *YesNoOpt; void Init_ox(); VALUE Ox = Qnil; ID ox_at_column_id; ID ox_at_content_id; ID ox_at_id; ID ox_at_line_id; ID ox_at_value_id; ID ox_attr_id; ID ox_attr_value_id; ID ox_attributes_id; ID ox_attrs_done_id; ID ox_beg_id; ID ox_cdata_id; ID ox_comment_id; ID ox_den_id; ID ox_doctype_id; ID ox_end_element_id; ID ox_end_id; ID ox_end_instruct_id; ID ox_error_id; ID ox_excl_id; ID ox_external_encoding_id; ID ox_fileno_id; ID ox_force_encoding_id; ID ox_inspect_id; ID ox_instruct_id; ID ox_jd_id; ID ox_keys_id; ID ox_local_id; ID ox_mesg_id; ID ox_message_id; ID ox_nodes_id; ID ox_new_id; ID ox_num_id; ID ox_parse_id; ID ox_read_id; ID ox_readpartial_id; ID ox_start_element_id; ID ox_string_id; ID ox_text_id; ID ox_to_c_id; ID ox_to_s_id; ID ox_to_sym_id; ID ox_tv_nsec_id; ID ox_tv_sec_id; ID ox_tv_usec_id; ID ox_value_id; VALUE ox_encoding_sym; VALUE ox_empty_string; VALUE ox_zero_fixnum; VALUE ox_arg_error_class; VALUE ox_bag_clas; VALUE ox_bigdecimal_class; VALUE ox_cdata_clas; VALUE ox_comment_clas; VALUE ox_date_class; VALUE ox_doctype_clas; VALUE ox_document_clas; VALUE ox_element_clas; VALUE ox_instruct_clas; VALUE ox_parse_error_class; VALUE ox_stringio_class; VALUE ox_struct_class; VALUE ox_time_class; Cache ox_symbol_cache = 0; Cache ox_class_cache = 0; Cache ox_attr_cache = 0; static VALUE auto_define_sym; static VALUE auto_sym; static VALUE circular_sym; static VALUE convert_special_sym; static VALUE effort_sym; static VALUE generic_sym; static VALUE indent_sym; static VALUE limited_sym; static VALUE mode_sym; static VALUE object_sym; static VALUE opt_format_sym; static VALUE optimized_sym; static VALUE strict_sym; static VALUE smart_sym; static VALUE symbolize_keys_sym; static VALUE symbolize_sym; static VALUE tolerant_sym; static VALUE trace_sym; static VALUE with_dtd_sym; static VALUE with_instruct_sym; static VALUE with_xml_sym; static VALUE xsd_date_sym; #if HAS_ENCODING_SUPPORT rb_encoding *ox_utf8_encoding = 0; #elif HAS_PRIVATE_ENCODING VALUE ox_utf8_encoding = Qnil; #else void *ox_utf8_encoding = 0; #endif struct _Options ox_default_options = { { '\0' }, /* encoding */ 2, /* indent */ 0, /* trace */ No, /* with_dtd */ No, /* with_xml */ No, /* with_instruct */ No, /* circular */ No, /* xsd_date */ NoMode, /* mode */ StrictEffort, /* effort */ Yes, /* sym_keys */ #if HAS_PRIVATE_ENCODING Qnil /* rb_enc */ #else 0 /* rb_enc */ #endif }; extern ParseCallbacks ox_obj_callbacks; extern ParseCallbacks ox_gen_callbacks; extern ParseCallbacks ox_limited_callbacks; extern ParseCallbacks ox_nomode_callbacks; static void parse_dump_options(VALUE ropts, Options copts); static char* defuse_bom(char *xml, Options options) { switch ((uint8_t)*xml) { case 0xEF: /* UTF-8 */ if (0xBB == (uint8_t)xml[1] && 0xBF == (uint8_t)xml[2]) { options->rb_enc = ox_utf8_encoding; xml += 3; } else { rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n"); } break; #if 0 case 0xFE: /* UTF-16BE */ if (0xFF == (uint8_t)xml[1]) { options->rb_enc = ox_utf16be_encoding; xml += 2; } else { rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n"); } break; case 0xFF: /* UTF-16LE or UTF-32LE */ if (0xFE == (uint8_t)xml[1]) { if (0x00 == (uint8_t)xml[2] && 0x00 == (uint8_t)xml[3]) { options->rb_enc = ox_utf32le_encoding; xml += 4; } else { options->rb_enc = ox_utf16le_encoding; xml += 2; } } else { rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n"); } break; case 0x00: /* UTF-32BE */ if (0x00 == (uint8_t)xml[1] && 0xFE == (uint8_t)xml[2] && 0xFF == (uint8_t)xml[3]) { options->rb_enc = ox_utf32be_encoding; xml += 4; } else { rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n"); } break; #endif default: /* Let it fail if there is a BOM that is not UTF-8. Other BOM options are not ASCII compatible. */ break; } return xml; } /* call-seq: ox_default_options() => Hash * * Returns the default load and dump options as a Hash. The options are * - indent: [Fixnum] number of spaces to indent each element in an XML document * - trace: [Fixnum] trace level where 0 is silent * - encoding: [String] character encoding for the XML file * - with_dtd: [true|false|nil] include DTD in the dump * - with_instruct: [true|false|nil] include instructions in the dump * - with_xml: [true|false|nil] include XML prolog in the dump * - circular: [true|false|nil] support circular references while dumping * - xsd_date: [true|false|nil] use XSD date format instead of decimal format * - mode: [:object|:generic|:limited|nil] load method to use for XML * - effort: [:strict|:tolerant|:auto_define] set the tolerance level for loading * - symbolize_keys: [true|false|nil] symbolize element attribute keys or leave as Strings * @return [Hash] all current option settings. * * Note that an indent of less than zero will result in a tight one line output * unless the text in the XML fields contain new line characters. */ static VALUE get_def_opts(VALUE self) { VALUE opts = rb_hash_new(); int elen = (int)strlen(ox_default_options.encoding); rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_str_new(ox_default_options.encoding, elen)); rb_hash_aset(opts, indent_sym, INT2FIX(ox_default_options.indent)); rb_hash_aset(opts, trace_sym, INT2FIX(ox_default_options.trace)); rb_hash_aset(opts, with_dtd_sym, (Yes == ox_default_options.with_dtd) ? Qtrue : ((No == ox_default_options.with_dtd) ? Qfalse : Qnil)); rb_hash_aset(opts, with_xml_sym, (Yes == ox_default_options.with_xml) ? Qtrue : ((No == ox_default_options.with_xml) ? Qfalse : Qnil)); rb_hash_aset(opts, with_instruct_sym, (Yes == ox_default_options.with_instruct) ? Qtrue : ((No == ox_default_options.with_instruct) ? Qfalse : Qnil)); rb_hash_aset(opts, circular_sym, (Yes == ox_default_options.circular) ? Qtrue : ((No == ox_default_options.circular) ? Qfalse : Qnil)); rb_hash_aset(opts, xsd_date_sym, (Yes == ox_default_options.xsd_date) ? Qtrue : ((No == ox_default_options.xsd_date) ? Qfalse : Qnil)); rb_hash_aset(opts, symbolize_keys_sym, (Yes == ox_default_options.sym_keys) ? Qtrue : ((No == ox_default_options.sym_keys) ? Qfalse : Qnil)); switch (ox_default_options.mode) { case ObjMode: rb_hash_aset(opts, mode_sym, object_sym); break; case GenMode: rb_hash_aset(opts, mode_sym, generic_sym); break; case LimMode: rb_hash_aset(opts, mode_sym, limited_sym); break; case NoMode: default: rb_hash_aset(opts, mode_sym, Qnil); break; } switch (ox_default_options.effort) { case StrictEffort: rb_hash_aset(opts, effort_sym, strict_sym); break; case TolerantEffort: rb_hash_aset(opts, effort_sym, tolerant_sym); break; case AutoEffort: rb_hash_aset(opts, effort_sym, auto_define_sym); break; case NoEffort: default: rb_hash_aset(opts, effort_sym, Qnil); break; } return opts; } /* call-seq: ox_default_options=(opts) * * Sets the default options for load and dump. * @param [Hash] opts options to change * @param [Fixnum] :indent number of spaces to indent each element in an XML document * @param [Fixnum] :trace trace level where 0 is silent * @param [String] :encoding character encoding for the XML file * @param [true|false|nil] :with_dtd include DTD in the dump * @param [true|false|nil] :with_instruct include instructions in the dump * @param [true|false|nil] :with_xml include XML prolog in the dump * @param [true|false|nil] :circular support circular references while dumping * @param [true|false|nil] :xsd_date use XSD date format instead of decimal format * @param [:object|:generic|:limited|nil] :mode load method to use for XML * @param [:strict|:tolerant|:auto_define] :effort set the tolerance level for loading * @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings * @return [nil] */ static VALUE set_def_opts(VALUE self, VALUE opts) { struct _YesNoOpt ynos[] = { { with_xml_sym, &ox_default_options.with_xml }, { with_dtd_sym, &ox_default_options.with_dtd }, { with_instruct_sym, &ox_default_options.with_instruct }, { xsd_date_sym, &ox_default_options.xsd_date }, { circular_sym, &ox_default_options.circular }, { symbolize_keys_sym, &ox_default_options.sym_keys }, { Qnil, 0 } }; YesNoOpt o; VALUE v; Check_Type(opts, T_HASH); v = rb_hash_aref(opts, ox_encoding_sym); if (Qnil == v) { *ox_default_options.encoding = '\0'; } else { Check_Type(v, T_STRING); strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1); #if HAS_ENCODING_SUPPORT ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding); #elif HAS_PRIVATE_ENCODING ox_default_options.rb_enc = rb_str_new2(ox_default_options.encoding); rb_gc_register_address(&ox_default_options.rb_enc); #endif } v = rb_hash_aref(opts, indent_sym); if (Qnil != v) { Check_Type(v, T_FIXNUM); ox_default_options.indent = FIX2INT(v); } v = rb_hash_aref(opts, trace_sym); if (Qnil != v) { Check_Type(v, T_FIXNUM); ox_default_options.trace = FIX2INT(v); } v = rb_hash_aref(opts, mode_sym); if (Qnil == v) { ox_default_options.mode = NoMode; } else if (object_sym == v) { ox_default_options.mode = ObjMode; } else if (generic_sym == v) { ox_default_options.mode = GenMode; } else if (limited_sym == v) { ox_default_options.mode = LimMode; } else { rb_raise(ox_parse_error_class, ":mode must be :object, :generic, :limited, or nil.\n"); } v = rb_hash_aref(opts, effort_sym); if (Qnil == v) { ox_default_options.effort = NoEffort; } else if (strict_sym == v) { ox_default_options.effort = StrictEffort; } else if (tolerant_sym == v) { ox_default_options.effort = TolerantEffort; } else if (auto_define_sym == v) { ox_default_options.effort = AutoEffort; } else { rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n"); } for (o = ynos; 0 != o->attr; o++) { v = rb_hash_lookup(opts, o->sym); if (Qnil == v) { *o->attr = NotSet; } else if (Qtrue == v) { *o->attr = Yes; } else if (Qfalse == v) { *o->attr = No; } else { rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym))); } } return Qnil; } /* call-seq: parse_obj(xml) => Object * * Parses an XML document String that is in the object format and returns an * Object of the type represented by the XML. This function expects an * optimized XML formated String. For other formats use the more generic * Ox.load() method. Raises an exception if the XML is malformed or the * classes specified in the file are not valid. * @param [String] xml XML String in optimized Object format. * @return [Object] deserialized Object. */ static VALUE to_obj(VALUE self, VALUE ruby_xml) { char *xml, *x; size_t len; VALUE obj; struct _Options options = ox_default_options; struct _Err err; err_init(&err); Check_Type(ruby_xml, T_STRING); /* the xml string gets modified so make a copy of it */ len = RSTRING_LEN(ruby_xml) + 1; x = defuse_bom(StringValuePtr(ruby_xml), &options); if (SMALL_XML < len) { xml = ALLOC_N(char, len); } else { xml = ALLOCA_N(char, len); } memcpy(xml, x, len); #if HAS_GC_GUARD rb_gc_disable(); #endif obj = ox_parse(xml, ox_obj_callbacks, 0, &options, &err); if (SMALL_XML < len) { xfree(xml); } #if HAS_GC_GUARD RB_GC_GUARD(obj); rb_gc_enable(); #endif if (err_has(&err)) { ox_err_raise(&err); } return obj; } /* call-seq: parse(xml) => Ox::Document or Ox::Element * * Parses and XML document String into an Ox::Document or Ox::Element. * @param [String] xml XML String * @return [Ox::Document or Ox::Element] parsed XML document. * @raise [Exception] if the XML is malformed. */ static VALUE to_gen(VALUE self, VALUE ruby_xml) { char *xml, *x; size_t len; VALUE obj; struct _Options options = ox_default_options; struct _Err err; err_init(&err); Check_Type(ruby_xml, T_STRING); /* the xml string gets modified so make a copy of it */ len = RSTRING_LEN(ruby_xml) + 1; x = defuse_bom(StringValuePtr(ruby_xml), &options); if (SMALL_XML < len) { xml = ALLOC_N(char, len); } else { xml = ALLOCA_N(char, len); } memcpy(xml, x, len); obj = ox_parse(xml, ox_gen_callbacks, 0, &options, &err); if (SMALL_XML < len) { xfree(xml); } if (err_has(&err)) { ox_err_raise(&err); } return obj; } static VALUE load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) { VALUE obj; struct _Options options = ox_default_options; if (1 == argc && rb_cHash == rb_obj_class(*argv)) { VALUE h = *argv; VALUE v; if (Qnil != (v = rb_hash_lookup(h, mode_sym))) { if (object_sym == v) { options.mode = ObjMode; } else if (optimized_sym == v) { options.mode = ObjMode; } else if (generic_sym == v) { options.mode = GenMode; } else if (limited_sym == v) { options.mode = LimMode; } else { rb_raise(ox_parse_error_class, ":mode must be :generic, :object, or :limited.\n"); } } if (Qnil != (v = rb_hash_lookup(h, effort_sym))) { if (auto_define_sym == v) { options.effort = AutoEffort; } else if (tolerant_sym == v) { options.effort = TolerantEffort; } else if (strict_sym == v) { options.effort = StrictEffort; } else { rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n"); } } if (Qnil != (v = rb_hash_lookup(h, trace_sym))) { Check_Type(v, T_FIXNUM); options.trace = FIX2INT(v); } if (Qnil != (v = rb_hash_lookup(h, symbolize_keys_sym))) { options.sym_keys = (Qfalse == v) ? No : Yes; } } #if HAS_ENCODING_SUPPORT if ('\0' == *options.encoding) { if (Qnil != encoding) { options.rb_enc = rb_enc_from_index(rb_enc_get_index(encoding)); } else { options.rb_enc = 0; } } else if (0 == options.rb_enc) { options.rb_enc = rb_enc_find(options.encoding); } #elif HAS_PRIVATE_ENCODING if ('\0' == *options.encoding) { if (Qnil != encoding) { options.rb_enc = encoding; } else { options.rb_enc = Qnil; } } else if (0 == options.rb_enc) { options.rb_enc = rb_str_new2(options.encoding); rb_gc_register_address(&options.rb_enc); } #endif xml = defuse_bom(xml, &options); switch (options.mode) { case ObjMode: #if HAS_GC_GUARD rb_gc_disable(); #endif obj = ox_parse(xml, ox_obj_callbacks, 0, &options, err); #if HAS_GC_GUARD RB_GC_GUARD(obj); rb_gc_enable(); #endif break; case GenMode: obj = ox_parse(xml, ox_gen_callbacks, 0, &options, err); break; case LimMode: obj = ox_parse(xml, ox_limited_callbacks, 0, &options, err); break; case NoMode: obj = ox_parse(xml, ox_nomode_callbacks, 0, &options, err); break; default: obj = ox_parse(xml, ox_gen_callbacks, 0, &options, err); break; } return obj; } /* call-seq: load(xml, options) => Ox::Document or Ox::Element or Object * * Parses and XML document String into an Ox::Document, or Ox::Element, or * Object depending on the options. Raises an exception if the XML is * malformed or the classes specified are not valid. * @param [String] xml XML String * @param [Hash] options load options * @param [:object|:generic|:limited] :mode format expected * - *:object* - object format * - *:generic* - read as a generic XML file * - *:limited* - read as a generic XML file but with callbacks on text and elements events only * @param [:strict|:tolerant|:auto_define] :effort effort to use when an undefined class is encountered, default: :strict * - *:strict* - raise an NameError for missing classes and modules * - *:tolerant* - return nil for missing classes and modules * - *:auto_define* - auto define missing classes and modules * @param [Fixnum] :trace trace level as a Fixnum, default: 0 (silent) * @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings */ static VALUE load_str(int argc, VALUE *argv, VALUE self) { char *xml; size_t len; VALUE obj; VALUE encoding; struct _Err err; err_init(&err); Check_Type(*argv, T_STRING); /* the xml string gets modified so make a copy of it */ len = RSTRING_LEN(*argv) + 1; if (SMALL_XML < len) { xml = ALLOC_N(char, len); } else { xml = ALLOCA_N(char, len); } #if HAS_ENCODING_SUPPORT #ifdef MACRUBY_RUBY encoding = rb_funcall(*argv, rb_intern("encoding"), 0); #else encoding = rb_obj_encoding(*argv); #endif #elif HAS_PRIVATE_ENCODING encoding = rb_funcall(*argv, rb_intern("encoding"), 0); #else encoding = Qnil; #endif memcpy(xml, StringValuePtr(*argv), len); obj = load(xml, argc - 1, argv + 1, self, encoding, &err); if (SMALL_XML < len) { xfree(xml); } if (err_has(&err)) { ox_err_raise(&err); } return obj; } /* call-seq: load_file(file_path, options) => Ox::Document or Ox::Element or Object * * Parses and XML document from a file into an Ox::Document, or Ox::Element, * or Object depending on the options. Raises an exception if the XML is * malformed or the classes specified are not valid. * @param [String] file_path file path to read the XML document from * @param [Hash] options load options * @param [:object|:generic|:limited] :mode format expected * - *:object* - object format * - *:generic* - read as a generic XML file * - *:limited* - read as a generic XML file but with callbacks on text and elements events only * @param [:strict|:tolerant|:auto_define] :effort effort to use when an undefined class is encountered, default: :strict * - *:strict* - raise an NameError for missing classes and modules * - *:tolerant* - return nil for missing classes and modules * - *:auto_define* - auto define missing classes and modules * @param [Fixnum] :trace trace level as a Fixnum, default: 0 (silent) * @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings */ static VALUE load_file(int argc, VALUE *argv, VALUE self) { char *path; char *xml; FILE *f; size_t len; VALUE obj; struct _Err err; err_init(&err); Check_Type(*argv, T_STRING); path = StringValuePtr(*argv); if (0 == (f = fopen(path, "r"))) { rb_raise(rb_eIOError, "%s\n", strerror(errno)); } fseek(f, 0, SEEK_END); len = ftell(f); if (SMALL_XML < len) { xml = ALLOC_N(char, len + 1); } else { xml = ALLOCA_N(char, len + 1); } fseek(f, 0, SEEK_SET); if (len != fread(xml, 1, len, f)) { ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path); obj = Qnil; } else { xml[len] = '\0'; obj = load(xml, argc - 1, argv + 1, self, Qnil, &err); } fclose(f); if (SMALL_XML < len) { xfree(xml); } if (err_has(&err)) { ox_err_raise(&err); } return obj; } /* call-seq: sax_parse(handler, io, options) * * Parses an IO stream or file containing an XML document. Raises an exception * if the XML is malformed or the classes specified are not valid. * @param [Ox::Sax] handler SAX (responds to OX::Sax methods) like handler * @param [IO|String] io IO Object to read from * @param [Hash] options parse options * @param [true|false] :convert_special flag indicating special characters like < are converted * @param [true|false] :symbolize flag indicating the parser symbolize element and attribute names * @param [true|false] :smart flag indicating the parser use hints if available (use with html) */ static VALUE sax_parse(int argc, VALUE *argv, VALUE self) { struct _SaxOptions options; options.symbolize = 1; options.convert_special = 0; options.smart = 0; if (argc < 2) { rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n"); } if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) { VALUE h = argv[2]; VALUE v; if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) { options.convert_special = (Qtrue == v); } if (Qnil != (v = rb_hash_lookup(h, smart_sym))) { options.smart = (Qtrue == v); } if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) { options.symbolize = (Qtrue == v); } } ox_sax_parse(argv[0], argv[1], &options); return Qnil; } static void parse_dump_options(VALUE ropts, Options copts) { struct _YesNoOpt ynos[] = { { with_xml_sym, &copts->with_xml }, { with_dtd_sym, &copts->with_dtd }, { with_instruct_sym, &copts->with_instruct }, { xsd_date_sym, &copts->xsd_date }, { circular_sym, &copts->circular }, { Qnil, 0 } }; YesNoOpt o; if (rb_cHash == rb_obj_class(ropts)) { VALUE v; if (Qnil != (v = rb_hash_lookup(ropts, indent_sym))) { if (rb_cFixnum != rb_obj_class(v)) { rb_raise(ox_parse_error_class, ":indent must be a Fixnum.\n"); } copts->indent = NUM2INT(v); } if (Qnil != (v = rb_hash_lookup(ropts, trace_sym))) { if (rb_cFixnum != rb_obj_class(v)) { rb_raise(ox_parse_error_class, ":trace must be a Fixnum.\n"); } copts->trace = NUM2INT(v); } if (Qnil != (v = rb_hash_lookup(ropts, ox_encoding_sym))) { if (rb_cString != rb_obj_class(v)) { rb_raise(ox_parse_error_class, ":encoding must be a String.\n"); } strncpy(copts->encoding, StringValuePtr(v), sizeof(copts->encoding) - 1); } if (Qnil != (v = rb_hash_lookup(ropts, effort_sym))) { if (auto_define_sym == v) { copts->effort = AutoEffort; } else if (tolerant_sym == v) { copts->effort = TolerantEffort; } else if (strict_sym == v) { copts->effort = StrictEffort; } else { rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n"); } } for (o = ynos; 0 != o->attr; o++) { if (Qnil != (v = rb_hash_lookup(ropts, o->sym))) { VALUE c = rb_obj_class(v); if (rb_cTrueClass == c) { *o->attr = Yes; } else if (rb_cFalseClass == c) { *o->attr = No; } else { rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym))); } } } } } /* call-seq: dump(obj, options) => xml-string * * Dumps an Object (obj) to a string. * @param [Object] obj Object to serialize as an XML document String * @param [Hash] options formating options * @param [Fixnum] :indent format expected * @param [true|false] :xsd_date use XSD date format if true, default: false * @param [true|false] :circular allow circular references, default: false * @param [:strict|:tolerant] :effort effort to use when an undumpable object (e.g., IO) is encountered, default: :strict * - *:strict* - raise an NotImplementedError if an undumpable object is encountered * - *:tolerant* - replaces undumplable objects with nil * * Note that an indent of less than zero will result in a tight one line output * unless the text in the XML fields contain new line characters. */ static VALUE dump(int argc, VALUE *argv, VALUE self) { char *xml; struct _Options copts = ox_default_options; VALUE rstr; if (2 == argc) { parse_dump_options(argv[1], &copts); } if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) { rb_raise(rb_eNoMemError, "Not enough memory.\n"); } rstr = rb_str_new2(xml); #if HAS_ENCODING_SUPPORT if ('\0' != *copts.encoding) { rb_enc_associate(rstr, rb_enc_find(copts.encoding)); } #elif HAS_PRIVATE_ENCODING if ('\0' != *copts.encoding) { rb_funcall(rstr, ox_force_encoding_id, 1, rb_str_new2(copts.encoding)); } #endif xfree(xml); return rstr; } /* call-seq: to_file(file_path, obj, options) * * Dumps an Object to the specified file. * @param [String] file_path file path to write the XML document to * @param [Object] obj Object to serialize as an XML document String * @param [Hash] options formating options * @param [Fixnum] :indent format expected * @param [true|false] :xsd_date use XSD date format if true, default: false * @param [true|false] :circular allow circular references, default: false * @param [:strict|:tolerant] :effort effort to use when an undumpable object (e.g., IO) is encountered, default: :strict * - *:strict* - raise an NotImplementedError if an undumpable object is encountered * - *:tolerant* - replaces undumplable objects with nil * * Note that an indent of less than zero will result in a tight one line output * unless the text in the XML fields contain new line characters. */ static VALUE to_file(int argc, VALUE *argv, VALUE self) { struct _Options copts = ox_default_options; if (3 == argc) { parse_dump_options(argv[2], &copts); } Check_Type(*argv, T_STRING); ox_write_obj_to_file(argv[1], StringValuePtr(*argv), &copts); return Qnil; } extern void ox_cache_test(void); static VALUE cache_test(VALUE self) { ox_cache_test(); return Qnil; } extern void ox_cache8_test(void); static VALUE cache8_test(VALUE self) { ox_cache8_test(); return Qnil; } void Init_ox() { Ox = rb_define_module("Ox"); rb_define_module_function(Ox, "default_options", get_def_opts, 0); rb_define_module_function(Ox, "default_options=", set_def_opts, 1); rb_define_module_function(Ox, "parse_obj", to_obj, 1); rb_define_module_function(Ox, "parse", to_gen, 1); rb_define_module_function(Ox, "load", load_str, -1); rb_define_module_function(Ox, "sax_parse", sax_parse, -1); rb_define_module_function(Ox, "to_xml", dump, -1); rb_define_module_function(Ox, "dump", dump, -1); rb_define_module_function(Ox, "load_file", load_file, -1); rb_define_module_function(Ox, "to_file", to_file, -1); rb_require("time"); rb_require("date"); rb_require("bigdecimal"); rb_require("stringio"); ox_at_column_id = rb_intern("@column"); ox_at_content_id = rb_intern("@content"); ox_at_id = rb_intern("at"); ox_at_line_id = rb_intern("@line"); ox_at_value_id = rb_intern("@value"); ox_attr_id = rb_intern("attr"); ox_attr_value_id = rb_intern("attr_value"); ox_attributes_id = rb_intern("@attributes"); ox_attrs_done_id = rb_intern("attrs_done"); ox_beg_id = rb_intern("@beg"); ox_cdata_id = rb_intern("cdata"); ox_comment_id = rb_intern("comment"); ox_den_id = rb_intern("@den"); ox_doctype_id = rb_intern("doctype"); ox_end_element_id = rb_intern("end_element"); ox_end_id = rb_intern("@end"); ox_end_instruct_id = rb_intern("end_instruct"); ox_error_id = rb_intern("error"); ox_excl_id = rb_intern("@excl"); ox_external_encoding_id = rb_intern("external_encoding"); ox_fileno_id = rb_intern("fileno"); ox_force_encoding_id = rb_intern("force_encoding"); ox_inspect_id = rb_intern("inspect"); ox_instruct_id = rb_intern("instruct"); ox_jd_id = rb_intern("jd"); ox_keys_id = rb_intern("keys"); ox_local_id = rb_intern("local"); ox_mesg_id = rb_intern("mesg"); ox_message_id = rb_intern("message"); ox_nodes_id = rb_intern("@nodes"); ox_new_id = rb_intern("new"); ox_num_id = rb_intern("@num"); ox_parse_id = rb_intern("parse"); ox_read_id = rb_intern("read"); ox_readpartial_id = rb_intern("readpartial"); ox_start_element_id = rb_intern("start_element"); ox_string_id = rb_intern("string"); ox_text_id = rb_intern("text"); ox_to_c_id = rb_intern("to_c"); ox_to_s_id = rb_intern("to_s"); ox_to_sym_id = rb_intern("to_sym"); ox_tv_nsec_id = rb_intern("tv_nsec"); ox_tv_sec_id = rb_intern("tv_sec"); ox_tv_usec_id = rb_intern("tv_usec"); ox_value_id = rb_intern("value"); rb_require("ox/version"); rb_require("ox/error"); rb_require("ox/hasattrs"); rb_require("ox/node"); rb_require("ox/comment"); rb_require("ox/instruct"); rb_require("ox/cdata"); rb_require("ox/doctype"); rb_require("ox/element"); rb_require("ox/document"); rb_require("ox/bag"); rb_require("ox/sax"); ox_time_class = rb_const_get(rb_cObject, rb_intern("Time")); ox_date_class = rb_const_get(rb_cObject, rb_intern("Date")); ox_parse_error_class = rb_const_get_at(Ox, rb_intern("ParseError")); ox_arg_error_class = rb_const_get_at(Ox, rb_intern("ArgError")); ox_struct_class = rb_const_get(rb_cObject, rb_intern("Struct")); ox_stringio_class = rb_const_get(rb_cObject, rb_intern("StringIO")); ox_bigdecimal_class = rb_const_get(rb_cObject, rb_intern("BigDecimal")); auto_define_sym = ID2SYM(rb_intern("auto_define")); rb_gc_register_address(&auto_define_sym); auto_sym = ID2SYM(rb_intern("auto")); rb_gc_register_address(&auto_sym); circular_sym = ID2SYM(rb_intern("circular")); rb_gc_register_address(&circular_sym); convert_special_sym = ID2SYM(rb_intern("convert_special")); rb_gc_register_address(&convert_special_sym); effort_sym = ID2SYM(rb_intern("effort")); rb_gc_register_address(&effort_sym); generic_sym = ID2SYM(rb_intern("generic")); rb_gc_register_address(&generic_sym); indent_sym = ID2SYM(rb_intern("indent")); rb_gc_register_address(&indent_sym); limited_sym = ID2SYM(rb_intern("limited")); rb_gc_register_address(&limited_sym); mode_sym = ID2SYM(rb_intern("mode")); rb_gc_register_address(&mode_sym); object_sym = ID2SYM(rb_intern("object")); rb_gc_register_address(&object_sym); opt_format_sym = ID2SYM(rb_intern("opt_format")); rb_gc_register_address(&opt_format_sym); optimized_sym = ID2SYM(rb_intern("optimized")); rb_gc_register_address(&optimized_sym); ox_encoding_sym = ID2SYM(rb_intern("encoding")); rb_gc_register_address(&ox_encoding_sym); smart_sym = ID2SYM(rb_intern("smart")); rb_gc_register_address(&smart_sym); strict_sym = ID2SYM(rb_intern("strict")); rb_gc_register_address(&strict_sym); symbolize_keys_sym = ID2SYM(rb_intern("symbolize_keys")); rb_gc_register_address(&symbolize_keys_sym); symbolize_sym = ID2SYM(rb_intern("symbolize")); rb_gc_register_address(&symbolize_sym); tolerant_sym = ID2SYM(rb_intern("tolerant")); rb_gc_register_address(&tolerant_sym); trace_sym = ID2SYM(rb_intern("trace")); rb_gc_register_address(&trace_sym); with_dtd_sym = ID2SYM(rb_intern("with_dtd")); rb_gc_register_address(&with_dtd_sym); with_instruct_sym = ID2SYM(rb_intern("with_instructions")); rb_gc_register_address(&with_instruct_sym); with_xml_sym = ID2SYM(rb_intern("with_xml")); rb_gc_register_address(&with_xml_sym); xsd_date_sym = ID2SYM(rb_intern("xsd_date")); rb_gc_register_address(&xsd_date_sym); ox_empty_string = rb_str_new2(""); rb_gc_register_address(&ox_empty_string); ox_zero_fixnum = INT2NUM(0); rb_gc_register_address(&ox_zero_fixnum); ox_document_clas = rb_const_get_at(Ox, rb_intern("Document")); ox_element_clas = rb_const_get_at(Ox, rb_intern("Element")); ox_instruct_clas = rb_const_get_at(Ox, rb_intern("Instruct")); ox_comment_clas = rb_const_get_at(Ox, rb_intern("Comment")); ox_doctype_clas = rb_const_get_at(Ox, rb_intern("DocType")); ox_cdata_clas = rb_const_get_at(Ox, rb_intern("CData")); ox_bag_clas = rb_const_get_at(Ox, rb_intern("Bag")); ox_cache_new(&ox_symbol_cache); ox_cache_new(&ox_class_cache); ox_cache_new(&ox_attr_cache); ox_sax_define(); rb_define_module_function(Ox, "cache_test", cache_test, 0); rb_define_module_function(Ox, "cache8_test", cache8_test, 0); #if HAS_ENCODING_SUPPORT ox_utf8_encoding = rb_enc_find("UTF-8"); #elif HAS_PRIVATE_ENCODING ox_utf8_encoding = rb_str_new2("UTF-8"); rb_gc_register_address(&ox_utf8_encoding); #endif } void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line) { int xline = 1; int col = 1; for (; xml < current && '\n' != *current; current--) { col++; } for (; xml < current; current--) { if ('\n' == *current) { xline++; } } #if HAS_GC_GUARD rb_gc_enable(); #endif rb_raise(ox_parse_error_class, "%s at line %d, column %d [%s:%d]\n", msg, xline, col, file, line); } ox-2.1.1/ext/ox/sax_stack.h0000644000004100000410000000634212311544775015566 0ustar www-datawww-data/* sax_stack.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_SAX_STACK_H__ #define __OX_SAX_STACK_H__ #include "sax_hint.h" #define STACK_INC 32 typedef struct _Nv { const char *name; VALUE val; Hint hint; } *Nv; typedef struct _NStack { struct _Nv base[STACK_INC]; Nv head; /* current stack */ Nv end; /* stack end */ Nv tail; /* pointer to one past last element name on stack */ } *NStack; inline static void stack_init(NStack stack) { stack->head = stack->base; stack->end = stack->base + sizeof(stack->base) / sizeof(struct _Nv); stack->tail = stack->head; } inline static int stack_empty(NStack stack) { return (stack->head == stack->tail); } inline static void stack_cleanup(NStack stack) { if (stack->base != stack->head) { xfree(stack->head); } } inline static void stack_push(NStack stack, const char *name, VALUE val, Hint hint) { if (stack->end <= stack->tail) { size_t len = stack->end - stack->head; size_t toff = stack->tail - stack->head; if (stack->base == stack->head) { stack->head = ALLOC_N(struct _Nv, len + STACK_INC); memcpy(stack->head, stack->base, sizeof(struct _Nv) * len); } else { REALLOC_N(stack->head, struct _Nv, len + STACK_INC); } stack->tail = stack->head + toff; stack->end = stack->head + len + STACK_INC; } stack->tail->name = name; stack->tail->val = val; stack->tail->hint = hint; stack->tail++; } inline static Nv stack_peek(NStack stack) { if (stack->head < stack->tail) { return stack->tail - 1; } return 0; } inline static Nv stack_pop(NStack stack) { if (stack->head < stack->tail) { stack->tail--; return stack->tail; } return 0; } #endif /* __OX_SAX_STACK_H__ */ ox-2.1.1/ext/ox/sax_has.h0000644000004100000410000000643712311544775015241 0ustar www-datawww-data/* sax_has.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_SAX_HAS_H__ #define __OX_SAX_HAS_H__ typedef struct _Has { int instruct; int end_instruct; int attr; int attrs_done; int attr_value; int doctype; int comment; int cdata; int text; int value; int start_element; int end_element; int error; int line; int column; } *Has; inline static int respond_to(VALUE obj, ID method) { #ifdef JRUBY_RUBY /* There is a bug in JRuby where rb_respond_to() returns true (1) even if * a method is private. */ { VALUE args[1]; *args = ID2SYM(method); return (Qtrue == rb_funcall2(obj, rb_intern("respond_to?"), 1, args)); } #else return rb_respond_to(obj, method); #endif } inline static void has_init(Has has, VALUE handler) { has->instruct = respond_to(handler, ox_instruct_id); has->end_instruct = respond_to(handler, ox_end_instruct_id); has->attr = respond_to(handler, ox_attr_id); has->attr_value = respond_to(handler, ox_attr_value_id); has->attrs_done = respond_to(handler, ox_attrs_done_id); has->doctype = respond_to(handler, ox_doctype_id); has->comment = respond_to(handler, ox_comment_id); has->cdata = respond_to(handler, ox_cdata_id); has->text = respond_to(handler, ox_text_id); has->value = respond_to(handler, ox_value_id); has->start_element = respond_to(handler, ox_start_element_id); has->end_element = respond_to(handler, ox_end_element_id); has->error = respond_to(handler, ox_error_id); has->line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id)); has->column = (Qtrue == rb_ivar_defined(handler, ox_at_column_id)); } #endif /* __OX_SAX_HAS_H__ */ ox-2.1.1/ext/ox/special.h0000644000004100000410000000332512311544775015224 0ustar www-datawww-data/* special.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_SPECIAL_H__ #define __OX_SPECIAL_H__ #include extern char* ox_ucs_to_utf8_chars(char *text, uint64_t u); #endif /* __OX_SPECIAL_H__ */ ox-2.1.1/ext/ox/err.h0000644000004100000410000000437412311544775014401 0ustar www-datawww-data/* err.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_ERR_H__ #define __OX_ERR_H__ #include "ruby.h" #define set_error(err, msg, xml, current) _ox_err_set_with_location(err, msg, xml, current, __FILE__, __LINE__) typedef struct _Err { VALUE clas; char msg[128]; } *Err; extern VALUE ox_arg_error_class; extern VALUE ox_parse_error_class; extern void ox_err_set(Err e, VALUE clas, const char *format, ...); extern void _ox_err_set_with_location(Err err, const char *msg, const char *xml, const char *current, const char* file, int line); extern void ox_err_raise(Err e); inline static void err_init(Err e) { e->clas = Qnil; *e->msg = '\0'; } inline static int err_has(Err e) { return (Qnil != e->clas); } #endif /* __OX_ERR_H__ */ ox-2.1.1/ext/ox/err.c0000644000004100000410000000442212311544775014366 0ustar www-datawww-data/* err.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "err.h" void ox_err_set(Err e, VALUE clas, const char *format, ...) { va_list ap; va_start(ap, format); e->clas = clas; vsnprintf(e->msg, sizeof(e->msg) - 1, format, ap); va_end(ap); } void ox_err_raise(Err e) { rb_raise(e->clas, "%s", e->msg); } void _ox_err_set_with_location(Err err, const char *msg, const char *xml, const char *current, const char* file, int line) { int xline = 1; int col = 1; for (; xml < current && '\n' != *current; current--) { col++; } for (; xml < current; current--) { if ('\n' == *current) { xline++; } } ox_err_set(err, ox_parse_error_class, "%s at line %d, column %d [%s:%d]\n", msg, xline, col, file, line); } ox-2.1.1/ext/ox/cache8.c0000644000004100000410000000400012311544775014721 0ustar www-datawww-data #include #include #include #include #include #include "ruby.h" #include "cache8.h" #define BITS 4 #define MASK 0x000000000000000FULL #define SLOT_CNT 16 #define DEPTH 16 typedef union { struct _Cache8 *child; slot_t value; } Bucket; struct _Cache8 { Bucket buckets[SLOT_CNT]; }; static void cache8_delete(Cache8 cache, int depth); static void slot_print(Cache8 cache, sid_t key, unsigned int depth); void ox_cache8_new(Cache8 *cache) { Bucket *b; int i; *cache = ALLOC(struct _Cache8); for (i = SLOT_CNT, b = (*cache)->buckets; 0 < i; i--, b++) { b->value = 0; } } void ox_cache8_delete(Cache8 cache) { cache8_delete(cache, 0); } static void cache8_delete(Cache8 cache, int depth) { Bucket *b; unsigned int i; for (i = 0, b = cache->buckets; i < SLOT_CNT; i++, b++) { if (0 != b->child) { if (DEPTH - 1 != depth) { cache8_delete(b->child, depth + 1); } } } xfree(cache); } slot_t ox_cache8_get(Cache8 cache, sid_t key, slot_t **slot) { Bucket *b; int i; sid_t k8 = (sid_t)key; sid_t k; for (i = 64 - BITS; 0 < i; i -= BITS) { k = (k8 >> i) & MASK; b = cache->buckets + k; if (0 == b->child) { ox_cache8_new(&b->child); } cache = b->child; } *slot = &(cache->buckets + (k8 & MASK))->value; return **slot; } void ox_cache8_print(Cache8 cache) { /*printf("-------------------------------------------\n"); */ slot_print(cache, 0, 0); } static void slot_print(Cache8 c, sid_t key, unsigned int depth) { Bucket *b; unsigned int i; sid_t k8 = (sid_t)key; sid_t k; for (i = 0, b = c->buckets; i < SLOT_CNT; i++, b++) { if (0 != b->child) { k = (k8 << BITS) | i; /*printf("*** key: 0x%016llx depth: %u i: %u\n", k, depth, i); */ if (DEPTH - 1 == depth) { printf("0x%016llx: %4llu\n", (unsigned long long)k, (unsigned long long)b->value); } else { slot_print(b->child, k, depth + 1); } } } } ox-2.1.1/ext/ox/cache_test.c0000644000004100000410000000514712311544775015705 0ustar www-datawww-data/* cache_test.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cache.h" static const char *data[] = { #if 1 "one", "two", "one", "onex", "oney", "one", "tw", "onexyzabcdefgh", #else "abc", "abcd", "ab", "a", "abcdefghijklmnop", #endif 0 }; void ox_cache_test() { Cache c; const char **d; VALUE v; VALUE *slot = 0;; ox_cache_new(&c); for (d = data; 0 != *d; d++) { /*printf("*** cache_get on %s\n", *d);*/ v = ox_cache_get(c, *d, &slot, 0); if (Qundef == v) { if (0 == slot) { /*printf("*** failed to get a slot for %s\n", *d); */ } else { /*printf("*** added '%s' to cache\n", *d); */ v = ID2SYM(rb_intern(*d)); *slot = v; } } else { VALUE rs = rb_funcall2(v, rb_intern("to_s"), 0, 0); printf("*** get on '%s' returned '%s' (%s)\n", *d, StringValuePtr(rs), rb_class2name(rb_obj_class(v))); } /*ox_cache_print(c);*/ } ox_cache_print(c); } ox-2.1.1/ext/ox/cache8_test.c0000644000004100000410000000507512311544775015775 0ustar www-datawww-data/* cache8_test.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "cache8.h" static slot_t data[] = { 0x000000A0A0A0A0A0ULL, 0x0000000000ABCDEFULL, 0x0123456789ABCDEFULL, 0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000003ULL, 0x0000000000000004ULL, 0 }; void ox_cache8_test() { Cache8 c; slot_t v; slot_t *d; slot_t cnt = 1; slot_t *slot = 0; ox_cache8_new(&c); for (d = data; 0 != *d; d++) { v = ox_cache8_get(c, *d, &slot); if (0 == v) { if (0 == slot) { printf("*** failed to get a slot for 0x%016llx\n", (unsigned long long)*d); } else { printf("*** adding 0x%016llx to cache with value %llu\n", (unsigned long long)*d, (unsigned long long)cnt); *slot = cnt++; } } else { printf("*** get on 0x%016llx returned %llu\n", (unsigned long long)*d, (unsigned long long)v); } /*ox_cache8_print(c); */ } ox_cache8_print(c); } ox-2.1.1/ext/ox/cache.h0000644000004100000410000000354312311544775014651 0ustar www-datawww-data/* cache.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_CACHE_H__ #define __OX_CACHE_H__ #include "ruby.h" typedef struct _Cache *Cache; extern void ox_cache_new(Cache *cache); extern VALUE ox_cache_get(Cache cache, const char *key, VALUE **slot, char **keyp); extern void ox_cache_print(Cache cache); #endif /* __OX_CACHE_H__ */ ox-2.1.1/ext/ox/type.h0000644000004100000410000000462312311544775014567 0ustar www-datawww-data/* type.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_TYPE_H__ #define __OX_TYPE_H__ typedef enum { NoCode = 0, ArrayCode = 'a', String64Code = 'b', /* base64 encoded String */ ClassCode = 'c', Symbol64Code = 'd', /* base64 encoded Symbol */ DateCode = 'D', BigDecimalCode = 'B', ExceptionCode = 'e', FloatCode = 'f', RegexpCode = 'g', HashCode = 'h', FixnumCode = 'i', BignumCode = 'j', KeyCode = 'k', /* indicates the value is a hash key, kind of a hack */ RationalCode = 'l', SymbolCode = 'm', FalseClassCode = 'n', ObjectCode = 'o', RefCode = 'p', RangeCode = 'r', StringCode = 's', TimeCode = 't', StructCode = 'u', ComplexCode = 'v', RawCode = 'x', TrueClassCode = 'y', NilClassCode = 'z', } Type; #endif /* __OX_TYPE_H__ */ ox-2.1.1/ext/ox/ox.h0000644000004100000410000001620712311544775014235 0ustar www-datawww-data/* ox.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_H__ #define __OX_H__ #if defined(__cplusplus) extern "C" { #if 0 } /* satisfy cc-mode */ #endif #endif #define RSTRING_NOT_MODIFIED #include "ruby.h" #if HAS_ENCODING_SUPPORT #include "ruby/encoding.h" #endif #ifdef RUBINIUS_RUBY #undef T_COMPLEX enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK}; #else #if HAS_TOP_LEVEL_ST_H /* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */ #include "st.h" #else #include "ruby/st.h" #endif #endif #include "cache.h" #include "err.h" #include "type.h" #include "attr.h" #include "helper.h" #define raise_error(msg, xml, current) _ox_raise_error(msg, xml, current, __FILE__, __LINE__) #define MAX_TEXT_LEN 4096 #define SILENT 0 #define TRACE 1 #define DEBUG 2 #define XSD_DATE 0x0001 #define WITH_XML 0x0002 #define WITH_INST 0x0004 #define WITH_DTD 0x0008 #define CIRCULAR 0x0010 #define XSD_DATE_SET 0x0100 #define WITH_XML_SET 0x0200 #define WITH_INST_SET 0x0400 #define WITH_DTD_SET 0x0800 #define CIRCULAR_SET 0x1000 typedef enum { UseObj = 1, UseAttr = 2, UseAttrSet = 3, UseArray = 4, UseAMember = 5, UseHash = 6, UseHashKey = 7, UseHashVal = 8, UseRange = 9, UseRangeAttr= 10, UseRaw = 11, } Use; typedef enum { StrictEffort = 's', TolerantEffort = 't', AutoEffort = 'a', NoEffort = 0, } Effort; typedef enum { Yes = 'y', No = 'n', NotSet = 0 } YesNo; typedef enum { ObjMode = 'o', GenMode = 'g', LimMode = 'l', NoMode = 0 } LoadMode; typedef struct _PInfo *PInfo; typedef struct _ParseCallbacks { void (*instruct)(PInfo pi, const char *target, Attr attrs, const char *content); void (*add_doctype)(PInfo pi, const char *docType); void (*add_comment)(PInfo pi, const char *comment); void (*add_cdata)(PInfo pi, const char *cdata, size_t len); void (*add_text)(PInfo pi, char *text, int closed); void (*add_element)(PInfo pi, const char *ename, Attr attrs, int hasChildren); void (*end_element)(PInfo pi, const char *ename); } *ParseCallbacks; typedef struct _CircArray { VALUE obj_array[1024]; VALUE *objs; unsigned long size; /* allocated size or initial array size */ unsigned long cnt; } *CircArray; typedef struct _Options { char encoding[64]; /* encoding, stored in the option to avoid GC invalidation in default values */ int indent; /* indention for dump, default 2 */ int trace; /* trace level */ char with_dtd; /* YesNo */ char with_xml; /* YesNo */ char with_instruct; /* YesNo */ char circular; /* YesNo */ char xsd_date; /* YesNo */ char mode; /* LoadMode */ char effort; /* Effort */ char sym_keys; /* symbolize keys */ #if HAS_ENCODING_SUPPORT rb_encoding *rb_enc; #elif HAS_PRIVATE_ENCODING VALUE rb_enc; #else void *rb_enc; #endif } *Options; /* parse information structure */ struct _PInfo { struct _HelperStack helpers; struct _Err err; char *str; /* buffer being read from */ char *s; /* current position in buffer */ VALUE obj; ParseCallbacks pcb; CircArray circ_array; unsigned long id; /* set for text types when cirs_array is set */ Options options; char last; /* last character read, rarely set */ }; extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err); extern void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line); extern void ox_sax_define(void); extern char* ox_write_obj_to_str(VALUE obj, Options copts); extern void ox_write_obj_to_file(VALUE obj, const char *path, Options copts); extern struct _Options ox_default_options; extern VALUE Ox; extern ID ox_at_column_id; extern ID ox_at_content_id; extern ID ox_at_id; extern ID ox_at_line_id; extern ID ox_at_value_id; extern ID ox_attr_id; extern ID ox_attr_value_id; extern ID ox_attrs_done_id; extern ID ox_attributes_id; extern ID ox_beg_id; extern ID ox_cdata_id; extern ID ox_comment_id; extern ID ox_den_id; extern ID ox_doctype_id; extern ID ox_end_element_id; extern ID ox_end_id; extern ID ox_end_instruct_id; extern ID ox_error_id; extern ID ox_excl_id; extern ID ox_external_encoding_id; extern ID ox_fileno_id; extern ID ox_force_encoding_id; extern ID ox_inspect_id; extern ID ox_instruct_id; extern ID ox_jd_id; extern ID ox_keys_id; extern ID ox_local_id; extern ID ox_mesg_id; extern ID ox_message_id; extern ID ox_nodes_id; extern ID ox_new_id; extern ID ox_num_id; extern ID ox_parse_id; extern ID ox_read_id; extern ID ox_readpartial_id; extern ID ox_start_element_id; extern ID ox_string_id; extern ID ox_text_id; extern ID ox_to_c_id; extern ID ox_to_s_id; extern ID ox_to_sym_id; extern ID ox_tv_sec_id; extern ID ox_tv_nsec_id; extern ID ox_tv_usec_id; extern ID ox_value_id; #if HAS_ENCODING_SUPPORT extern rb_encoding *ox_utf8_encoding; #elif HAS_PRIVATE_ENCODING extern VALUE ox_utf8_encoding; #else extern void *ox_utf8_encoding; #endif extern VALUE ox_bigdecimal_class; extern VALUE ox_date_class; extern VALUE ox_empty_string; extern VALUE ox_encoding_sym; extern VALUE ox_stringio_class; extern VALUE ox_struct_class; extern VALUE ox_time_class; extern VALUE ox_zero_fixnum; extern VALUE ox_document_clas; extern VALUE ox_element_clas; extern VALUE ox_instruct_clas; extern VALUE ox_bag_clas; extern VALUE ox_comment_clas; extern VALUE ox_doctype_clas; extern VALUE ox_cdata_clas; extern Cache ox_symbol_cache; extern Cache ox_class_cache; extern Cache ox_attr_cache; #if defined(__cplusplus) #if 0 { /* satisfy cc-mode */ #endif } /* extern "C" { */ #endif #endif /* __OX_H__ */ ox-2.1.1/ext/ox/sax.c0000644000004100000410000010570312311544775014375 0ustar www-datawww-data/* sax.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #if NEEDS_UIO #include #endif #include #include #include "ruby.h" #include "ox.h" #include "sax.h" #include "sax_stack.h" #include "sax_buf.h" #include "special.h" #define NAME_MISMATCH 1 #define START_STATE 1 #define BODY_STATE 2 #define AFTER_STATE 3 // error prefixes #define BAD_BOM "Bad BOM: " #define NO_TERM "Not Terminated: " #define INVALID_FORMAT "Invalid Format: " #define CASE_ERROR "Case Error: " #define OUT_OF_ORDER "Out of Order: " #define WRONG_CHAR "Unexpected Character: " #define EL_MISMATCH "Start End Mismatch: " #define INV_ELEMENT "Invalid Element: " #define UTF8_STR "UTF-8" static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options); static void parse(SaxDrive dr); // All read functions should return the next character after the 'thing' that was read and leave dr->cur one after that. static char read_instruction(SaxDrive dr); static char read_doctype(SaxDrive dr); static char read_cdata(SaxDrive dr); static char read_comment(SaxDrive dr); static char read_element_start(SaxDrive dr); static char read_element_end(SaxDrive dr); static char read_text(SaxDrive dr); static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req); static char read_name_token(SaxDrive dr); static char read_quoted_value(SaxDrive dr); static void end_element_cb(SaxDrive dr, VALUE name, int line, int col); static void hint_clear_empty(SaxDrive dr); static Nv hint_try_close(SaxDrive dr, const char *name); VALUE ox_sax_value_class = Qnil; static VALUE protect_parse(VALUE drp) { parse((SaxDrive)drp); return Qnil; } void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) { struct _SaxDrive dr; int line = 0; sax_drive_init(&dr, handler, io, options); #if 0 printf("*** sax_parse with these flags\n"); printf(" has_instruct = %s\n", dr.has.instruct ? "true" : "false"); printf(" has_end_instruct = %s\n", dr.has.end_instruct ? "true" : "false"); printf(" has_attr = %s\n", dr.has.attr ? "true" : "false"); printf(" has_attr_value = %s\n", dr.has.attr_value ? "true" : "false"); printf(" has_attrs_done = %s\n", dr.has.attrs_done ? "true" : "false"); printf(" has_doctype = %s\n", dr.has.doctype ? "true" : "false"); printf(" has_comment = %s\n", dr.has.comment ? "true" : "false"); printf(" has_cdata = %s\n", dr.has.cdata ? "true" : "false"); printf(" has_text = %s\n", dr.has.text ? "true" : "false"); printf(" has_value = %s\n", dr.has.value ? "true" : "false"); printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false"); printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false"); printf(" has_error = %s\n", dr.has.error ? "true" : "false"); printf(" has_line = %s\n", dr.has.line ? "true" : "false"); printf(" has_column = %s\n", dr.has.column ? "true" : "false"); #endif //parse(&dr); rb_protect(protect_parse, (VALUE)&dr, &line); ox_sax_drive_cleanup(&dr); if (0 != line) { rb_jump_tag(line); } } static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) { ox_sax_buf_init(&dr->buf, io); dr->buf.dr = dr; stack_init(&dr->stack); dr->handler = handler; dr->value_obj = rb_data_object_alloc(ox_sax_value_class, dr, 0, 0); rb_gc_register_address(&dr->value_obj); dr->options = *options; dr->hints = 0; dr->err = 0; has_init(&dr->has, handler); #if HAS_ENCODING_SUPPORT if ('\0' == *ox_default_options.encoding) { VALUE encoding; dr->encoding = 0; if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) { int e = rb_enc_get_index(encoding); if (0 <= e) { dr->encoding = rb_enc_from_index(e); } } } else { dr->encoding = rb_enc_find(ox_default_options.encoding); } #elif HAS_PRIVATE_ENCODING if ('\0' == *ox_default_options.encoding) { VALUE encoding; if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) { dr->encoding = encoding; } else { dr->encoding = Qnil; } } else { dr->encoding = rb_str_new2(ox_default_options.encoding); } #else dr->encoding = 0; #endif } void ox_sax_drive_cleanup(SaxDrive dr) { rb_gc_unregister_address(&dr->value_obj); buf_cleanup(&dr->buf); stack_cleanup(&dr->stack); } static void ox_sax_drive_error_at(SaxDrive dr, const char *msg, int line, int col) { if (dr->has.error) { VALUE args[3]; args[0] = rb_str_new2(msg); args[1] = LONG2NUM(line); args[2] = LONG2NUM(col); if (dr->has.line) { rb_ivar_set(dr->handler, ox_at_line_id, args[1]); } if (dr->has.column) { rb_ivar_set(dr->handler, ox_at_column_id, args[2]); } rb_funcall2(dr->handler, ox_error_id, 3, args); } } void ox_sax_drive_error(SaxDrive dr, const char *msg) { ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col); } static char skipBOM(SaxDrive dr) { char c = buf_get(&dr->buf); if (0xEF == (uint8_t)c) { /* only UTF8 is supported */ if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) { #if HAS_ENCODING_SUPPORT dr->encoding = ox_utf8_encoding; #elif HAS_PRIVATE_ENCODING dr->encoding = ox_utf8_encoding; #else dr->encoding = UTF8_STR; #endif c = buf_get(&dr->buf); } else { ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file."); c = '\0'; } } return c; } static void parse(SaxDrive dr) { char c = skipBOM(dr); int state = START_STATE; while ('\0' != c) { buf_protect(&dr->buf); if (is_white(c) && '\0' == (c = buf_next_non_white(&dr->buf))) { break; } if ('<' == c) { c = buf_get(&dr->buf); switch (c) { case '?': /* instructions (xml or otherwise) */ c = read_instruction(dr); break; case '!': /* comment or doctype */ buf_protect(&dr->buf); c = buf_get(&dr->buf); if ('\0' == c) { ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated"); goto DONE; } else if ('-' == c) { c = buf_get(&dr->buf); /* skip first - and get next character */ if ('-' != c) { ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected ", 4, out); } else if (ox_cdata_clas == clas) { dump_gen_val_node(*np, d2, "", 3, out); } else if (ox_doctype_clas == clas) { dump_gen_val_node(*np, d2, "", 2, out); } else { rb_raise(rb_eTypeError, "Unexpected class, %s, while dumping generic XML\n", rb_class2name(clas)); } } } return indent_needed; } static int dump_gen_attr(VALUE key, VALUE value, Out out) { const char *ks; size_t klen; size_t size; #if HAS_PRIVATE_ENCODING // There seems to be a bug in jruby for converting symbols to strings and preserving the encoding. This is a work // around. ks = rb_str_ptr(rb_String(key)); #else switch (rb_type(key)) { case T_SYMBOL: ks = rb_id2name(SYM2ID(key)); break; case T_STRING: ks = StringValuePtr(key); break; default: key = rb_String(key); ks = StringValuePtr(key); break; } #endif klen = strlen(ks); value = rb_String(value); size = 4 + klen + RSTRING_LEN(value); if (out->end - out->cur <= (long)size) { grow(out, size); } *out->cur++ = ' '; fill_value(out, ks, klen); *out->cur++ = '='; *out->cur++ = '"'; dump_str_value(out, StringValuePtr(value), RSTRING_LEN(value)); *out->cur++ = '"'; return ST_CONTINUE; } static void dump_gen_val_node(VALUE obj, int depth, const char *pre, size_t plen, const char *suf, size_t slen, Out out) { volatile VALUE v = rb_attr_get(obj, ox_at_value_id); const char *val; size_t vlen; size_t size; int indent; if (T_STRING != rb_type(v)) { return; } val = StringValuePtr(v); vlen = RSTRING_LEN(v); if (0 > out->indent) { indent = -1; } else if (0 == out->indent) { indent = 0; } else { indent = depth * out->indent; } size = indent + plen + slen + vlen; if (out->end - out->cur <= (long)size) { grow(out, size); } fill_indent(out, indent); fill_value(out, pre, plen); fill_value(out, val, vlen); fill_value(out, suf, slen); *out->cur = '\0'; } static void dump_obj_to_xml(VALUE obj, Options copts, Out out) { VALUE clas = rb_obj_class(obj); out->w_time = (Yes == copts->xsd_date) ? dump_time_xsd : dump_time_thin; out->buf = ALLOC_N(char, 65336); out->end = out->buf + 65325; /* 10 less than end plus extra for possible errors */ out->cur = out->buf; out->circ_cache = 0; out->circ_cnt = 0; out->opts = copts; out->obj = obj; if (Yes == copts->circular) { ox_cache8_new(&out->circ_cache); } out->indent = copts->indent; if (ox_document_clas == clas) { dump_gen_doc(obj, -1, out); } else if (ox_element_clas == clas) { dump_gen_element(obj, 0, out); } else { out->w_start = dump_start; out->w_end = dump_end; dump_first_obj(obj, out); } dump_value(out, "\n", 1); if (Yes == copts->circular) { ox_cache8_delete(out->circ_cache); } } char* ox_write_obj_to_str(VALUE obj, Options copts) { struct _Out out; dump_obj_to_xml(obj, copts, &out); return out.buf; } void ox_write_obj_to_file(VALUE obj, const char *path, Options copts) { struct _Out out; size_t size; FILE *f; dump_obj_to_xml(obj, copts, &out); size = out.cur - out.buf; if (0 == (f = fopen(path, "w"))) { rb_raise(rb_eIOError, "%s\n", strerror(errno)); } if (size != fwrite(out.buf, 1, size, f)) { int err = ferror(f); rb_raise(rb_eIOError, "Write failed. [%d:%s]\n", err, strerror(err)); } xfree(out.buf); fclose(f); } ox-2.1.1/ext/ox/special.c0000644000004100000410000000653112311544775015221 0ustar www-datawww-data/* special.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "special.h" /* u0000..u007F 00000000000000xxxxxxx 0xxxxxxx u0080..u07FF 0000000000yyyyyxxxxxx 110yyyyy 10xxxxxx u0800..uD7FF, uE000..uFFFF 00000zzzzyyyyyyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx u10000..u10FFFF uuuzzzzzzyyyyyyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */ char* ox_ucs_to_utf8_chars(char *text, uint64_t u) { int reading = 0; int i; unsigned char c; if (u <= 0x000000000000007FULL) { /* 0xxxxxxx */ *text++ = (char)u; } else if (u <= 0x00000000000007FFULL) { /* 110yyyyy 10xxxxxx */ *text++ = (char)(0x00000000000000C0ULL | (0x000000000000001FULL & (u >> 6))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u)); } else if (u <= 0x000000000000D7FFULL || (0x000000000000E000ULL <= u && u <= 0x000000000000FFFFULL)) { /* 1110zzzz 10yyyyyy 10xxxxxx */ *text++ = (char)(0x00000000000000E0ULL | (0x000000000000000FULL & (u >> 12))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u)); } else if (0x0000000000010000ULL <= u && u <= 0x000000000010FFFFULL) { /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */ *text++ = (char)(0x00000000000000F0ULL | (0x0000000000000007ULL & (u >> 18))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 12))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6))); *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u)); } else { /* assume it is UTF-8 encoded directly and not UCS */ for (i = 56; 0 <= i; i -= 8) { c = (unsigned char)((u >> i) & 0x00000000000000FFULL); if (reading) { *text++ = (char)c; } else if ('\0' != c) { *text++ = (char)c; reading = 1; } } } return text; } ox-2.1.1/ext/ox/attr.h0000644000004100000410000000655512311544775014566 0ustar www-datawww-data/* attr.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_ATTR_H__ #define __OX_ATTR_H__ #include "ox.h" #define ATTR_STACK_INC 8 typedef struct _Attr { const char *name; const char *value; } *Attr; typedef struct _AttrStack { struct _Attr base[ATTR_STACK_INC]; Attr head; /* current stack */ Attr end; /* stack end */ Attr tail; /* pointer to one past last element name on stack */ } *AttrStack; inline static void attr_stack_init(AttrStack stack) { stack->head = stack->base; stack->end = stack->base + sizeof(stack->base) / sizeof(struct _Attr); stack->tail = stack->head; stack->head->name = 0; } inline static int attr_stack_empty(AttrStack stack) { return (stack->head == stack->tail); } inline static void attr_stack_cleanup(AttrStack stack) { if (stack->base != stack->head) { xfree(stack->head); stack->head = stack->base; } } inline static void attr_stack_push(AttrStack stack, const char *name, const char *value) { if (stack->end <= stack->tail + 1) { size_t len = stack->end - stack->head; size_t toff = stack->tail - stack->head; if (stack->base == stack->head) { stack->head = ALLOC_N(struct _Attr, len + ATTR_STACK_INC); memcpy(stack->head, stack->base, sizeof(struct _Attr) * len); } else { REALLOC_N(stack->head, struct _Attr, len + ATTR_STACK_INC); } stack->tail = stack->head + toff; stack->end = stack->head + len + ATTR_STACK_INC; } stack->tail->name = name; stack->tail->value = value; stack->tail++; stack->tail->name = 0; // terminate } inline static Attr attr_stack_peek(AttrStack stack) { if (stack->head < stack->tail) { return stack->tail - 1; } return 0; } inline static Attr attr_stack_pop(AttrStack stack) { if (stack->head < stack->tail) { stack->tail--; return stack->tail; } return 0; } #endif /* __OX_ATTR_H__ */ ox-2.1.1/ext/ox/encode.h0000644000004100000410000000371012311544775015037 0ustar www-datawww-data/* encode.h * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __OX_ENCODE_H__ #define __OX_ENCODE_H__ #include "ruby.h" #if HAS_ENCODING_SUPPORT #include "ruby/encoding.h" #endif static inline VALUE ox_encode(VALUE rstr) { #if HAS_ENCODING_SUPPORT rb_enc_associate(rstr, ox_utf8_encoding); #else if (Qnil != ox_utf8_encoding) { rstr = rb_funcall(ox_utf8_encoding, ox_iconv_id, 1, rstr); } #endif return rstr; } #endif /* __OX_ENCODE_H__ */ ox-2.1.1/ext/ox/parse.c0000644000004100000410000006337512311544775014724 0ustar www-datawww-data/* parse.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "ruby.h" #include "ox.h" #include "err.h" #include "attr.h" #include "helper.h" #include "special.h" static void read_instruction(PInfo pi); static void read_doctype(PInfo pi); static void read_comment(PInfo pi); static char* read_element(PInfo pi); static void read_text(PInfo pi); /*static void read_reduced_text(PInfo pi); */ static void read_cdata(PInfo pi); static char* read_name_token(PInfo pi); static char* read_quoted_value(PInfo pi); static char* read_hex_uint64(char *b, uint64_t *up); static char* read_10_uint64(char *b, uint64_t *up); static char* read_coded_chars(PInfo pi, char *text); static void next_non_white(PInfo pi); static int collapse_special(PInfo pi, char *str); /* This XML parser is a single pass, destructive, callback parser. It is a * single pass parse since it only make one pass over the characters in the * XML document string. It is destructive because it re-uses the content of * the string for values in the callback and places \0 characters at various * places to mark the end of tokens and strings. It is a callback parser like * a SAX parser because it uses callback when document elements are * encountered. * * Parsing is very tolerant. Lack of headers and even mispelled element * endings are passed over without raising an error. A best attempt is made in * all cases to parse the string. */ inline static void next_non_white(PInfo pi) { for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: return; } } } inline static void next_white(PInfo pi) { for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '\n': case '\r': case '\0': return; default: break; } } } VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err) { struct _PInfo pi; int body_read = 0; if (0 == xml) { set_error(err, "Invalid arg, xml string can not be null", xml, 0); return Qnil; } if (DEBUG <= options->trace) { printf("Parsing xml:\n%s\n", xml); } /* initialize parse info */ helper_stack_init(&pi.helpers); err_init(&pi.err); pi.str = xml; pi.s = xml; pi.pcb = pcb; pi.obj = Qnil; pi.circ_array = 0; pi.options = options; while (1) { next_non_white(&pi); /* skip white space */ if ('\0' == *pi.s) { break; } if (body_read && 0 != endp) { *endp = pi.s; break; } if ('<' != *pi.s) { /* all top level entities start with < */ set_error(err, "invalid format, expected <", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; } pi.s++; /* past < */ switch (*pi.s) { case '?': /* processing instruction */ pi.s++; read_instruction(&pi); break; case '!': /* comment or doctype */ pi.s++; if ('\0' == *pi.s) { set_error(err, "invalid format, DOCTYPE or comment not terminated", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; } else if ('-' == *pi.s) { pi.s++; /* skip - */ if ('-' != *pi.s) { set_error(err, "invalid format, bad comment format", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; } else { pi.s++; /* skip second - */ read_comment(&pi); } } else if ((TolerantEffort == options->effort) ? 0 == strncasecmp("DOCTYPE", pi.s, 7) : 0 == strncmp("DOCTYPE", pi.s, 7)) { pi.s += 7; read_doctype(&pi); } else { set_error(err, "invalid format, DOCTYPE or comment expected", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; } break; case '\0': set_error(err, "invalid format, document not terminated", pi.str, pi.s); helper_stack_cleanup(&pi.helpers); return Qnil; default: read_element(&pi); body_read = 1; break; } if (err_has(&pi.err)) { *err = pi.err; helper_stack_cleanup(&pi.helpers); return Qnil; } } helper_stack_cleanup(&pi.helpers); return pi.obj; } static char* gather_content(const char *src, char *content, size_t len) { for (; 0 < len; src++, content++, len--) { switch (*src) { case '?': if ('>' == *(src + 1)) { *content = '\0'; return (char*)(src + 1); } *content = *src; break; case '\0': return 0; default: *content = *src; break; } } return 0; } /* Entered after the "s; if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) { set_error(&pi->err, "processing instruction content too large or not terminated", pi->str, pi->s); return; } next_non_white(pi); c = *pi->s; *end = '\0'; /* terminate name */ if ('?' != c) { while ('?' != c) { pi->last = 0; if ('\0' == *pi->s) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, processing instruction not terminated", pi->str, pi->s); return; } next_non_white(pi); if (0 == (attr_name = read_name_token(pi))) { attr_stack_cleanup(&attrs); return; } end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { attrs_ok = 0; break; } *end = '\0'; /* terminate name */ /* read value */ next_non_white(pi); if (0 == (attr_value = read_quoted_value(pi))) { attr_stack_cleanup(&attrs); return; } attr_stack_push(&attrs, attr_name, attr_value); next_non_white(pi); if ('\0' == pi->last) { c = *pi->s; } else { c = pi->last; } } if ('?' == *pi->s) { pi->s++; } } else { pi->s++; } if (attrs_ok) { if ('>' != *pi->s++) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, processing instruction not terminated", pi->str, pi->s); return; } } else { pi->s = cend + 1; } if (0 != pi->pcb->instruct) { if (attrs_ok) { pi->pcb->instruct(pi, target, attrs.head, 0); } else { pi->pcb->instruct(pi, target, attrs.head, content); } } attr_stack_cleanup(&attrs); } static void read_delimited(PInfo pi, char end) { char c; if ('"' == end || '\'' == end) { for (c = *pi->s++; end != c; c = *pi->s++) { if ('\0' == c) { set_error(&pi->err, "invalid format, dectype not terminated", pi->str, pi->s); return; } } } else { while (1) { c = *pi->s++; if (end == c) { return; } switch (c) { case '\0': set_error(&pi->err, "invalid format, dectype not terminated", pi->str, pi->s); return; case '"': read_delimited(pi, c); break; case '\'': read_delimited(pi, c); break; case '[': read_delimited(pi, ']'); break; case '<': read_delimited(pi, '>'); break; default: break; } } } } /* Entered after the "s; read_delimited(pi, '>'); if (err_has(&pi->err)) { return; } pi->s--; *pi->s = '\0'; pi->s++; if (0 != pi->pcb->add_doctype) { pi->pcb->add_doctype(pi, docType); } } /* Entered after ""); if (0 == end) { set_error(&pi->err, "invalid format, comment not terminated", pi->str, pi->s); return; } for (s = end - 1; pi->s < s && !done; s--) { switch(*s) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: *(s + 1) = '\0'; done = 1; break; } } *end = '\0'; /* in case the comment was blank */ pi->s = end + 3; if (0 != pi->pcb->add_comment) { pi->pcb->add_comment(pi, comment); } } /* Entered after the '<' and the first character after that. Returns status * code. */ static char* read_element(PInfo pi) { struct _AttrStack attrs; const char *attr_name; const char *attr_value; char *name; char *ename; char *end; char c; long elen; int hasChildren = 0; int done = 0; attr_stack_init(&attrs); if (0 == (ename = read_name_token(pi))) { return 0; } end = pi->s; elen = end - ename; next_non_white(pi); c = *pi->s; *end = '\0'; if ('/' == c) { /* empty element, no attributes and no children */ pi->s++; if ('>' != *pi->s) { /*printf("*** '%s' ***\n", pi->s); */ attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s); return 0; } pi->s++; /* past > */ pi->pcb->add_element(pi, ename, attrs.head, hasChildren); pi->pcb->end_element(pi, ename); attr_stack_cleanup(&attrs); return 0; } /* read attribute names until the close (/ or >) is reached */ while (!done) { if ('\0' == c) { next_non_white(pi); c = *pi->s; } pi->last = 0; switch (c) { case '\0': attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; case '/': /* Element with just attributes. */ pi->s++; if ('>' != *pi->s) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s); return 0; } pi->s++; pi->pcb->add_element(pi, ename, attrs.head, hasChildren); pi->pcb->end_element(pi, ename); attr_stack_cleanup(&attrs); return 0; case '>': /* has either children or a value */ pi->s++; hasChildren = 1; done = 1; pi->pcb->add_element(pi, ename, attrs.head, hasChildren); break; default: /* Attribute name so it's an element and the attribute will be */ /* added to it. */ if (0 == (attr_name = read_name_token(pi))) { attr_stack_cleanup(&attrs); return 0; } end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { if (TolerantEffort == pi->options->effort) { pi->s--; pi->last = *pi->s; *end = '\0'; /* terminate name */ attr_value = ""; attr_stack_push(&attrs, attr_name, attr_value); break; } else { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, no attribute value", pi->str, pi->s); return 0; } } *end = '\0'; /* terminate name */ /* read value */ next_non_white(pi); if (0 == (attr_value = read_quoted_value(pi))) { return 0; } if (0 != strchr(attr_value, '&')) { if (0 != collapse_special(pi, (char*)attr_value)) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, special character does not end with a semicolon", pi->str, pi->s); return 0; } } attr_stack_push(&attrs, attr_name, attr_value); break; } if ('\0' == pi->last) { c = '\0'; } else { c = pi->last; pi->last = '\0'; } } if (hasChildren) { char *start; int first = 1; done = 0; /* read children */ while (!done) { start = pi->s; next_non_white(pi); c = *pi->s++; if ('\0' == c) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; } if ('<' == c) { char *slash; switch (*pi->s) { case '!': /* better be a comment or CDATA */ pi->s++; if ('-' == *pi->s && '-' == *(pi->s + 1)) { pi->s += 2; read_comment(pi); } else if ((TolerantEffort == pi->options->effort) ? 0 == strncasecmp("[CDATA[", pi->s, 7) : 0 == strncmp("[CDATA[", pi->s, 7)) { pi->s += 7; read_cdata(pi); } else { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, invalid comment or CDATA format", pi->str, pi->s); return 0; } break; case '?': /* processing instruction */ pi->s++; read_instruction(pi); break; case '/': slash = pi->s; pi->s++; if (0 == (name = read_name_token(pi))) { attr_stack_cleanup(&attrs); return 0; } end = pi->s; next_non_white(pi); c = *pi->s; *end = '\0'; if (0 != strcmp(name, ename)) { attr_stack_cleanup(&attrs); if (TolerantEffort == pi->options->effort) { pi->pcb->end_element(pi, ename); return name; } else { set_error(&pi->err, "invalid format, elements overlap", pi->str, pi->s); return 0; } } if ('>' != c) { attr_stack_cleanup(&attrs); set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s); return 0; } if (first && start != slash - 1) { /* some white space between start and here so add as text */ *(slash - 1) = '\0'; pi->pcb->add_text(pi, start, 1); } pi->s++; pi->pcb->end_element(pi, ename); attr_stack_cleanup(&attrs); return 0; case '\0': attr_stack_cleanup(&attrs); if (TolerantEffort == pi->options->effort) { return 0; } else { set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; } default: first = 0; /* a child element */ // Child closed with mismatched name. if (0 != (name = read_element(pi))) { attr_stack_cleanup(&attrs); if (0 == strcmp(name, ename)) { pi->s++; pi->pcb->end_element(pi, ename); return 0; } else { // not the correct element yet pi->pcb->end_element(pi, ename); return name; } } else if (err_has(&pi->err)) { return 0; } break; } } else { /* read as TEXT */ pi->s = start; /*pi->s--; */ read_text(pi); /*read_reduced_text(pi); */ /* to exit read_text with no errors the next character must be < */ if ('/' == *(pi->s + 1) && 0 == strncmp(ename, pi->s + 2, elen) && '>' == *(pi->s + elen + 2)) { /* close tag after text so treat as a value */ pi->s += elen + 3; pi->pcb->end_element(pi, ename); attr_stack_cleanup(&attrs); return 0; } } } } attr_stack_cleanup(&attrs); return 0; } static void read_text(PInfo pi) { char buf[MAX_TEXT_LEN]; char *b = buf; char *alloc_buf = 0; char *end = b + sizeof(buf) - 2; char c; int done = 0; while (!done) { c = *pi->s++; switch(c) { case '<': done = 1; pi->s--; break; case '\0': set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return; default: if (end <= (b + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */ unsigned long size; if (0 == alloc_buf) { size = sizeof(buf) * 2; alloc_buf = ALLOC_N(char, size); memcpy(alloc_buf, buf, b - buf); b = alloc_buf + (b - buf); } else { unsigned long pos = b - alloc_buf; size = (end - alloc_buf) * 2; REALLOC_N(alloc_buf, char, size); b = alloc_buf + pos; } end = alloc_buf + size - 2; } if ('&' == c) { if (0 == (b = read_coded_chars(pi, b))) { return; } } else { *b++ = c; } break; } } *b = '\0'; if (0 != alloc_buf) { pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1))); xfree(alloc_buf); } else { pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1))); } } #if 0 static void read_reduced_text(PInfo pi) { char buf[MAX_TEXT_LEN]; char *b = buf; char *alloc_buf = 0; char *end = b + sizeof(buf) - 2; char c; int spc = 0; int done = 0; while (!done) { c = *pi->s++; switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': spc = 1; break; case '<': done = 1; pi->s--; break; case '\0': set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return; default: if (end <= (b + spc + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */ unsigned long size; if (0 == alloc_buf) { size = sizeof(buf) * 2; alloc_buf = ALLOC_N(char, size); memcpy(alloc_buf, buf, b - buf); b = alloc_buf + (b - buf); } else { unsigned long pos = b - alloc_buf; size = (end - alloc_buf) * 2; REALLOC(alloc_buf, char, size); b = alloc_buf + pos; } end = alloc_buf + size - 2; } if (spc) { *b++ = ' '; } spc = 0; if ('&' == c) { if (0 == (b = read_coded_chars(pi, b))) { return; } } else { *b++ = c; } break; } } *b = '\0'; if (0 != alloc_buf) { pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1))); xfree(alloc_buf); } else { pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1))); } } #endif static char* read_name_token(PInfo pi) { char *start; next_non_white(pi); start = pi->s; for (; 1; pi->s++) { switch (*pi->s) { case ' ': case '\t': case '\f': case '?': case '=': case '/': case '>': case '\n': case '\r': return start; case '\0': /* documents never terminate after a name token */ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; break; /* to avoid warnings */ default: break; } } return start; } static void read_cdata(PInfo pi) { char *start; char *end; start = pi->s; end = strstr(pi->s, "]]>"); if (end == 0) { set_error(&pi->err, "invalid format, CDATA not terminated", pi->str, pi->s); return; } *end = '\0'; pi->s = end + 3; if (0 != pi->pcb->add_cdata) { pi->pcb->add_cdata(pi, start, end - start); } } inline static void next_non_token(PInfo pi) { for (; 1; pi->s++) { switch(*pi->s) { case ' ': case '\t': case '\f': case '\n': case '\r': case '/': case '>': return; default: break; } } } /* Assume the value starts immediately and goes until the quote character is * reached again. Do not read the character after the terminating quote. */ static char* read_quoted_value(PInfo pi) { char *value = 0; if ('"' == *pi->s || '\'' == *pi->s) { char term = *pi->s; pi->s++; /* skip quote character */ value = pi->s; for (; *pi->s != term; pi->s++) { if ('\0' == *pi->s) { set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; } } *pi->s = '\0'; /* terminate value */ pi->s++; /* move past quote */ } else if (StrictEffort == pi->options->effort) { set_error(&pi->err, "invalid format, expected a quote character", pi->str, pi->s); return 0; } else if (TolerantEffort == pi->options->effort) { value = pi->s; for (; 1; pi->s++) { switch (*pi->s) { case '\0': set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; case ' ': case '/': case '>': case '?': // for instructions case '\t': case '\n': case '\r': pi->last = *pi->s; *pi->s = '\0'; /* terminate value */ pi->s++; return value; default: break; } } } else { value = pi->s; next_white(pi); if ('\0' == *pi->s) { set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s); return 0; } *pi->s++ = '\0'; /* terminate value */ } return value; } static char* read_hex_uint64(char *b, uint64_t *up) { uint64_t u = 0; char c; for (; ';' != *b; b++) { c = *b; if ('0' <= c && c <= '9') { u = (u << 4) | (uint64_t)(c - '0'); } else if ('a' <= c && c <= 'f') { u = (u << 4) | (uint64_t)(c - 'a' + 10); } else if ('A' <= c && c <= 'F') { u = (u << 4) | (uint64_t)(c - 'A' + 10); } else { return 0; } } *up = u; return b; } static char* read_10_uint64(char *b, uint64_t *up) { uint64_t u = 0; char c; for (; ';' != *b; b++) { c = *b; if ('0' <= c && c <= '9') { u = (u * 10) + (uint64_t)(c - '0'); } else { return 0; } } *up = u; return b; } static char* read_coded_chars(PInfo pi, char *text) { char *b, buf[32]; char *end = buf + sizeof(buf) - 1; char *s; for (b = buf, s = pi->s; b < end; b++, s++) { *b = *s; if (';' == *s) { *(b + 1) = '\0'; s++; break; } } if (b > end) { *text++ = '&'; } else if ('#' == *buf) { uint64_t u = 0; b = buf + 1; if ('x' == *b || 'X' == *b) { b = read_hex_uint64(b + 1, &u); } else { b = read_10_uint64(b, &u); } if (0 == b) { *text++ = '&'; } else { if (u <= 0x000000000000007FULL) { *text++ = (char)u; #if HAS_PRIVATE_ENCODING } else if (ox_utf8_encoding == pi->options->rb_enc || 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) { #else } else if (ox_utf8_encoding == pi->options->rb_enc) { #endif text = ox_ucs_to_utf8_chars(text, u); #if HAS_PRIVATE_ENCODING } else if (Qnil == pi->options->rb_enc) { #else } else if (0 == pi->options->rb_enc) { #endif pi->options->rb_enc = ox_utf8_encoding; text = ox_ucs_to_utf8_chars(text, u); } else if (TolerantEffort == pi->options->effort) { *text++ = '&'; return text; } else if (u <= 0x00000000000000FFULL) { *text++ = (char)u; } else { /*set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */ set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); return 0; } pi->s = s; } } else if (0 == strcasecmp(buf, "nbsp;")) { pi->s = s; *text++ = ' '; } else if (0 == strcasecmp(buf, "lt;")) { pi->s = s; *text++ = '<'; } else if (0 == strcasecmp(buf, "gt;")) { pi->s = s; *text++ = '>'; } else if (0 == strcasecmp(buf, "amp;")) { pi->s = s; *text++ = '&'; } else if (0 == strcasecmp(buf, "quot;")) { pi->s = s; *text++ = '"'; } else if (0 == strcasecmp(buf, "apos;")) { pi->s = s; *text++ = '\''; } else { *text++ = '&'; } return text; } static int collapse_special(PInfo pi, char *str) { char *s = str; char *b = str; while ('\0' != *s) { if ('&' == *s) { int c; char *end; s++; if ('#' == *s) { uint64_t u = 0; char x; s++; if ('x' == *s || 'X' == *s) { x = *s; s++; end = read_hex_uint64(s, &u); } else { x = '\0'; end = read_10_uint64(s, &u); } if (0 == end) { if (TolerantEffort == pi->options->effort) { *b++ = '&'; *b++ = '#'; if ('\0' != x) { *b++ = x; } continue; } return EDOM; } if (u <= 0x000000000000007FULL) { *b++ = (char)u; #if HAS_PRIVATE_ENCODING } else if (ox_utf8_encoding == pi->options->rb_enc || 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) { #else } else if (ox_utf8_encoding == pi->options->rb_enc) { #endif b = ox_ucs_to_utf8_chars(b, u); /* TBD support UTF-16 */ #if HAS_PRIVATE_ENCODING } else if (Qnil == pi->options->rb_enc) { #else } else if (0 == pi->options->rb_enc) { #endif pi->options->rb_enc = ox_utf8_encoding; b = ox_ucs_to_utf8_chars(b, u); } else { /* set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/ set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); return 0; } s = end + 1; } else { if (0 == strncasecmp(s, "lt;", 3)) { c = '<'; s += 3; } else if (0 == strncasecmp(s, "gt;", 3)) { c = '>'; s += 3; } else if (0 == strncasecmp(s, "amp;", 4)) { c = '&'; s += 4; } else if (0 == strncasecmp(s, "quot;", 5)) { c = '"'; s += 5; } else if (0 == strncasecmp(s, "apos;", 5)) { c = '\''; s += 5; } else if (TolerantEffort == pi->options->effort) { *b++ = '&'; continue; } else { c = '?'; while (';' != *s++) { if ('\0' == *s) { return EDOM; } } s++; } *b++ = (char)c; } } else { *b++ = *s++; } } *b = '\0'; return 0; } ox-2.1.1/ext/ox/sax_buf.c0000644000004100000410000001742212311544775015231 0ustar www-datawww-data/* sax_buf.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #if NEEDS_UIO #include #endif #include #include #include "ruby.h" #include "ox.h" #include "sax.h" #define BUF_PAD 4 static VALUE rescue_cb(VALUE rdr, VALUE err); static VALUE io_cb(VALUE rdr); static VALUE partial_io_cb(VALUE rdr); static int read_from_io(Buf buf); #ifndef JRUBY_RUBY static int read_from_fd(Buf buf); #endif static int read_from_io_partial(Buf buf); static int read_from_str(Buf buf); void ox_sax_buf_init(Buf buf, VALUE io) { if (ox_stringio_class == rb_obj_class(io)) { VALUE s = rb_funcall2(io, ox_string_id, 0, 0); buf->read_func = read_from_str; buf->in_str = StringValuePtr(s); } else if (rb_respond_to(io, ox_readpartial_id)) { #ifdef JRUBY_RUBY buf->read_func = read_from_io_partial; buf->io = io; #else VALUE rfd; if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) { buf->read_func = read_from_fd; buf->fd = FIX2INT(rfd); } else { buf->read_func = read_from_io_partial; buf->io = io; } #endif } else if (rb_respond_to(io, ox_read_id)) { #ifdef JRUBY_RUBY buf->read_func = read_from_io; buf->io = io; #else VALUE rfd; if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) { buf->read_func = read_from_fd; buf->fd = FIX2INT(rfd); } else { buf->read_func = read_from_io; buf->io = io; } #endif } else { rb_raise(ox_arg_error_class, "sax_parser io argument must respond to readpartial() or read().\n"); } buf->head = buf->base; *buf->head = '\0'; buf->end = buf->head + sizeof(buf->base) - BUF_PAD; buf->tail = buf->head; buf->read_end = buf->head; buf->pro = 0; buf->str = 0; buf->line = 1; buf->col = 0; buf->pro_line = 1; buf->pro_col = 0; buf->dr = 0; } int ox_sax_buf_read(Buf buf) { int err; size_t shift = 0; // if there is not much room to read into, shift or realloc a larger buffer. if (buf->head < buf->tail && 4096 > buf->end - buf->tail) { if (0 == buf->pro) { shift = buf->tail - buf->head; } else { shift = buf->pro - buf->head - 1; // leave one character so we cab backup one } if (0 >= shift) { /* no space left so allocate more */ char *old = buf->head; size_t size = buf->end - buf->head + BUF_PAD; if (buf->head == buf->base) { buf->head = ALLOC_N(char, size * 2); memcpy(buf->head, old, size); } else { REALLOC_N(buf->head, char, size * 2); } buf->end = buf->head + size * 2 - BUF_PAD; buf->tail = buf->head + (buf->tail - old); buf->read_end = buf->head + (buf->read_end - old); if (0 != buf->pro) { buf->pro = buf->head + (buf->pro - old); } if (0 != buf->str) { buf->str = buf->head + (buf->str - old); } } else { memmove(buf->head, buf->head + shift, buf->read_end - (buf->head + shift)); buf->tail -= shift; buf->read_end -= shift; if (0 != buf->pro) { buf->pro -= shift; } if (0 != buf->str) { buf->str -= shift; } } } err = buf->read_func(buf); *buf->read_end = '\0'; return err; } static VALUE rescue_cb(VALUE rbuf, VALUE err) { #ifndef JRUBY_RUBY /* JRuby seems to play by a different set if rules. It passes in an Fixnum * instead of an error like other Rubies. For now assume all errors are * EOF and deal with the results further down the line. */ #if (defined(RUBINIUS_RUBY) || (1 == RUBY_VERSION_MAJOR && 8 == RUBY_VERSION_MINOR)) if (rb_obj_class(err) != rb_eTypeError) { #else if (rb_obj_class(err) != rb_eEOFError) { #endif Buf buf = (Buf)rbuf; //ox_sax_drive_cleanup(buf->dr); called after exiting protect rb_raise(err, "at line %d, column %d\n", buf->line, buf->col); } #endif return Qfalse; } static VALUE partial_io_cb(VALUE rbuf) { Buf buf = (Buf)rbuf; VALUE args[1]; VALUE rstr; char *str; size_t cnt; args[0] = ULONG2NUM(buf->end - buf->tail); rstr = rb_funcall2(buf->io, ox_readpartial_id, 1, args); str = StringValuePtr(rstr); cnt = strlen(str); //printf("*** read %lu bytes, str: '%s'\n", cnt, str); strcpy(buf->tail, str); buf->read_end = buf->tail + cnt; return Qtrue; } static VALUE io_cb(VALUE rbuf) { Buf buf = (Buf)rbuf; VALUE args[1]; VALUE rstr; char *str; size_t cnt; args[0] = ULONG2NUM(buf->end - buf->tail); rstr = rb_funcall2(buf->io, ox_read_id, 1, args); str = StringValuePtr(rstr); cnt = strlen(str); /*printf("*** read %lu bytes, str: '%s'\n", cnt, str); */ strcpy(buf->tail, str); buf->read_end = buf->tail + cnt; return Qtrue; } static int read_from_io_partial(Buf buf) { return (Qfalse == rb_rescue(partial_io_cb, (VALUE)buf, rescue_cb, (VALUE)buf)); } static int read_from_io(Buf buf) { return (Qfalse == rb_rescue(io_cb, (VALUE)buf, rescue_cb, (VALUE)buf)); } #ifndef JRUBY_RUBY static int read_from_fd(Buf buf) { ssize_t cnt; size_t max = buf->end - buf->tail; cnt = read(buf->fd, buf->tail, max); if (cnt < 0) { ox_sax_drive_error(buf->dr, "failed to read from file"); return -1; } else if (0 != cnt) { buf->read_end = buf->tail + cnt; } return 0; } #endif static char* ox_stpncpy(char *dest, const char *src, size_t n) { size_t cnt = strlen(src) + 1; if (n < cnt) { cnt = n; } strncpy(dest, src, cnt); return dest + cnt - 1; } static int read_from_str(Buf buf) { size_t max = buf->end - buf->tail - 1; char *s; long cnt; if ('\0' == *buf->in_str) { /* done */ return -1; } s = ox_stpncpy(buf->tail, buf->in_str, max); *s = '\0'; cnt = s - buf->tail; buf->in_str += cnt; buf->read_end = buf->tail + cnt; return 0; } ox-2.1.1/ext/ox/base64.c0000644000004100000410000001073312311544775014664 0ustar www-datawww-data/* base64.c * Copyright (c) 2011, Peter Ohler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * - Neither the name of Peter Ohler nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "base64.h" static char digits[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /* invalid or terminating characters are set to 'X' or \x58 */ static uchar s_digits[256] = "\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x3E\x58\x58\x58\x3F\ \x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x58\x58\x58\x58\x58\x58\ \x58\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\ \x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x58\x58\x58\x58\x58\ \x58\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\ \x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\ \x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58\x58"; void to_base64(const uchar *src, int len, char *b64) { const uchar *end3; int len3 = len % 3; uchar b1, b2, b3; end3 = src + (len - len3); while (src < end3) { b1 = *src++; b2 = *src++; b3 = *src++; *b64++ = digits[(uchar)(b1 >> 2)]; *b64++ = digits[(uchar)(((b1 & 0x03) << 4) | (b2 >> 4))]; *b64++ = digits[(uchar)(((b2 & 0x0F) << 2) | (b3 >> 6))]; *b64++ = digits[(uchar)(b3 & 0x3F)]; } if (1 == len3) { b1 = *src++; *b64++ = digits[b1 >> 2]; *b64++ = digits[(b1 & 0x03) << 4]; *b64++ = '='; *b64++ = '='; } else if (2 == len3) { b1 = *src++; b2 = *src++; *b64++ = digits[b1 >> 2]; *b64++ = digits[((b1 & 0x03) << 4) | (b2 >> 4)]; *b64++ = digits[(b2 & 0x0F) << 2]; *b64++ = '='; } *b64 = '\0'; } unsigned long b64_orig_size(const char *text) { const char *start = text; unsigned long size = 0; if ('\0' != *text) { for (; 0 != *text; text++) { } size = (text - start) * 3 / 4; text--; if ('=' == *text) { size--; text--; if ('=' == *text) { size--; } } } return size; } void from_base64(const char *b64, uchar *str) { uchar b0, b1, b2, b3; while (1) { if ('X' == (b0 = s_digits[(uchar)*b64++])) { break; } if ('X' == (b1 = s_digits[(uchar)*b64++])) { break; } *str++ = (b0 << 2) | ((b1 >> 4) & 0x03); if ('X' == (b2 = s_digits[(uchar)*b64++])) { break; } *str++ = (b1 << 4) | ((b2 >> 2) & 0x0F); if ('X' == (b3 = s_digits[(uchar)*b64++])) { break; } *str++ = (b2 << 6) | b3; } *str = '\0'; } ox-2.1.1/README.md0000644000004100000410000001674512311544775013476 0ustar www-datawww-data# Ox gem A fast XML parser and Object marshaller as a Ruby gem. ## Installation gem install ox ## Documentation *Documentation*: http://www.ohler.com/ox ## Source *GitHub* *repo*: https://github.com/ohler55/ox *RubyGems* *repo*: https://rubygems.org/gems/ox ## Follow @oxgem on Twitter [Follow @peterohler on Twitter](http://twitter.com/#!/peterohler) for announcements and news about the Ox gem. ## Build Status [![Build Status](https://secure.travis-ci.org/ohler55/ox.png?branch=master)](http://travis-ci.org/ohler55/ox) ## Links of Interest [Ruby XML Gem Comparison](http://www.ohler.com/dev/xml_with_ruby/xml_with_ruby.html) for a perfomance comparison between Ox, Nokogiri, and LibXML. [Fast Ruby XML Serialization](http://www.ohler.com/dev/ruby_object_xml_serialization/ruby_object_xml_serialization.html) to see how Ox can be used as a faster replacement for Marshal. *Fast JSON parser and marshaller on RubyGems*: https://rubygems.org/gems/oj *Fast JSON parser and marshaller on GitHub*: https://rubygems.org/gems/oj ## Release Notes ### Release 2.1.1 - Worked around a module reset and clear that occurs on some Rubies. ### Release 2.1.0 - Thanks to jfontan Ox now includes support for XMLRPC. ## Description Optimized XML (Ox), as the name implies was written to provide speed optimized XML and now HTML handling. It was designed to be an alternative to Nokogiri and other Ruby XML parsers in generic XML parsing and as an alternative to Marshal for Object serialization. Unlike some other Ruby XML parsers, Ox is self contained. Ox uses nothing other than standard C libraries so version issues with libXml are not an issue. Marshal uses a binary format for serializing Objects. That binary format changes with releases making Marshal dumped Object incompatible between some versions. The use of a binary format make debugging message streams or file contents next to impossible unless the same version of Ruby and only Ruby is used for inspecting the serialize Object. Ox on the other hand uses human readable XML. Ox also includes options that allow strict, tolerant, or a mode that automatically defines missing classes. It is possible to write an XML serialization gem with Nokogiri or other XML parsers but writing such a package in Ruby results in a module significantly slower than Marshal. This is what triggered the start of Ox development. Ox handles XML documents in three ways. It is a generic XML parser and writer, a fast Object / XML marshaller, and a stream SAX parser. Ox was written for speed as a replacement for Nokogiri, Ruby LibXML, and for Marshal. As an XML parser it is 2 or more times faster than Nokogiri and as a generic XML writer it is as much as 20 times faster than Nokogiri. Of course different files may result in slightly different times. As an Object serializer Ox is up to 6 times faster than the standard Ruby Marshal.dump() and up to 3 times faster than Marshal.load(). The SAX like stream parser is 40 times faster than Nokogiri and more than 13 times faster than LibXML when validating a file with minimal Ruby callbacks. Unlike Nokogiri and LibXML, Ox can be tuned to use only the SAX callbacks that are of interest to the caller. (See the perf_sax.rb file for an example.) Ox is compatible with Ruby 1.8.7, 1.9.2, 2.0.0, JRuby, and RBX. ### Object Dump Sample: ```ruby require 'ox' class Sample attr_accessor :a, :b, :c def initialize(a, b, c) @a = a @b = b @c = c end end # Create Object obj = Sample.new(1, "bee", ['x', :y, 7.0]) # Now dump the Object to an XML String. xml = Ox.dump(obj) # Convert the object back into a Sample Object. obj2 = Ox.parse_obj(xml) ``` ### Generic XML Writing and Parsing: ```ruby require 'ox' doc = Ox::Document.new(:version => '1.0') top = Ox::Element.new('top') top[:name] = 'sample' doc << top mid = Ox::Element.new('middle') mid[:name] = 'second' top << mid bot = Ox::Element.new('bottom') bot[:name] = 'third' mid << bot xml = Ox.dump(doc) # xml = # # # # # doc2 = Ox.parse(xml) puts "Same? #{doc == doc2}" # true ``` ### SAX XML Parsing: ```ruby require 'stringio' require 'ox' class Sample < ::Ox::Sax def start_element(name); puts "start: #{name}"; end def end_element(name); puts "end: #{name}"; end def attr(name, value); puts " #{name} => #{value}"; end def text(value); puts "text #{value}"; end end io = StringIO.new(%{ }) handler = Sample.new() Ox.sax_parse(handler, io) # outputs # start: top # name => sample # start: middle # name => second # start: bottom # name => third # end: bottom # end: middle # end: top ``` ### Yielding results immediately while SAX XML Parsing: ```ruby require 'stringio' require 'ox' class Yielder < ::Ox::Sax def initialize(block); @yield_to = block; end def start_element(name); @yield_to.call(name); end end io = StringIO.new(%{ }) proc = Proc.new { |name| puts name } handler = Yielder.new(proc) puts "before parse" Ox.sax_parse(handler, io) puts "after parse" # outputs # before parse # top # middle # bottom # after parse ``` ### Object XML format The XML format used for Object encoding follows the structure of the Object. Each XML element is encoded so that the XML element name is a type indicator. Attributes of the element provide additional information such as the Class if relevant, the Object attribute name, and Object ID if necessary. The type indicator map is: - **a** => `Array` - **b** => `Base64` - **c** => `Class` - **f** => `Float` - **g** => `Regexp` - **h** => `Hash` - **i** => `Fixnum` - **j** => `Bignum` - **l** => `Rational` - **m** => `Symbol` - **n** => `FalseClass` - **o** => `Object` - **p** => `Ref` - **r** => `Range` - **s** => `String` - **t** => `Time` - **u** => `Struct` - **v** => `Complex` - **x** => `Raw` - **y** => `TrueClass` - **z** => `NilClass` If the type is an Object, type 'o' then an attribute named 'c' should be set with the full Class name including the Module names. If the XML element represents an Object then a sub-elements is included for each attribute of the Object. An XML element attribute 'a' is set with a value that is the name of the Ruby Object attribute. In all cases, except for the Exception attribute hack the attribute names begin with an @ character. (Exception are strange in that the attributes of the Exception Class are not named with a @ suffix. A hack since it has to be done in C and can not be done through the interpreter.) Values are encoded as the text portion of an element or in the sub-elements of the principle. For example, a Fixnum is encoded as: ```xml 123 ``` An Array has sub-elements and is encoded similar to this example. ```xml 1 abc ``` A Hash is encoded with an even number of elements where the first element is the key and the second is the value. This is repeated for each entry in the Hash. An example is of { 1 => 'one', 2 => 'two' } encoding is: ```xml 1 one 2 two ``` Strings with characters not allowed in XML are base64 encoded amd will be converted back into a String when loaded. Ox supports circular references where attributes of one Object can refer to an Object that refers back to the first Object. When this option is used an Object ID is added to each XML Object element as the value of the 'a' attribute.