htree-0.8/0000755000175000017500000000000012260053056011475 5ustar jonasjonashtree-0.8/htree/0000755000175000017500000000000011747021106012604 5ustar jonasjonashtree-0.8/htree/fstr.rb0000644000175000017500000000117311747021106014111 0ustar jonasjonasrequire 'htree/modules' module HTree # :stopdoc: def HTree.with_frozen_string_hash if Thread.current[:htree_frozen_string_hash] yield else begin Thread.current[:htree_frozen_string_hash] = {} yield ensure Thread.current[:htree_frozen_string_hash] = nil end end end def HTree.frozen_string(str) if h = Thread.current[:htree_frozen_string_hash] if s = h[str] s else str = str.dup.freeze unless str.frozen? h[str] = str end else str = str.dup.freeze unless str.frozen? str end end # :startdoc: end htree-0.8/htree/tag.rb0000644000175000017500000000626311747021106013713 0ustar jonasjonasrequire 'htree/raw_string' require 'htree/text' require 'htree/scan' # for Pat::Name and Pat::Nmtoken require 'htree/context' require 'htree/name' require 'htree/fstr' module HTree # :stopdoc: class STag def initialize(name, attributes=[], inherited_context=DefaultContext) init_raw_string # normalize xml declaration name and attribute value. attributes = attributes.map {|aname, val| if !(Name === aname) && /\A(?:#{Pat::Name}?\{.*\})?#{Pat::Nmtoken}\z/o !~ aname raise HTree::Error, "invalid attribute name: #{aname.inspect}" end if !(Name === aname) && /\Axmlns(?:\z|:)/ =~ aname aname = Name.parse_attribute_name(aname, nil) end val = val.to_node if HTree::Location === val val = Text.new(val) unless Text === val [aname, val] } @inherited_context = inherited_context @xmlns_decls = {} # validate namespace consistency of given Name objects. if Name === name @xmlns_decls[name.namespace_prefix] = name.namespace_uri end attributes.each {|aname, text| next unless Name === aname next if aname.xmlns? if aname.namespace_prefix && aname.namespace_uri if @xmlns_decls.include? aname.namespace_prefix if @xmlns_decls[aname.namespace_prefix] != aname.namespace_uri raise ArgumentError, "inconsistent namespace use: #{aname.namespace_prefix} is used as #{@xmlns_decls[aname.namespace_prefix]} and #{aname.namespace_uri}" end else @xmlns_decls[aname.namespace_prefix] = aname.namespace_uri end end } attributes.each {|aname, text| next unless Name === aname next unless aname.xmlns? next if @xmlns_decls.include? aname.local_name if aname.local_name @xmlns_decls[aname.local_name] = text.to_s else uri = text.to_s @xmlns_decls[nil] = uri end } @context = make_context(@inherited_context) if Name === name @name = name else @name = Name.parse_element_name(name, @context) end @attributes = attributes.map {|aname, text| aname = Name.parse_attribute_name(aname, @context) unless Name === aname if !aname.namespace_prefix && !aname.namespace_uri.empty? # xxx: should recover error? raise HTree::Error, "global attribute without namespace prefix: #{aname.inspect}" end [aname, text] } @attributes.freeze end attr_reader :attributes, :inherited_context, :context def element_name @name end def make_context(inherited_context) inherited_context.subst_namespaces(@xmlns_decls) end def each_namespace_attribute @xmlns_decls.each {|name, uri| yield name, uri } nil end def each_attribute @attributes.each {|name, text| next if name.xmlns? yield name, text } nil end end class ETag def initialize(qualified_name) init_raw_string @qualified_name = HTree.frozen_string(qualified_name) end attr_reader :qualified_name end # :startdoc: end htree-0.8/htree/container.rb0000644000175000017500000000021711747021106015113 0ustar jonasjonasrequire 'htree/modules' module HTree::Container # +children+ returns children nodes as an array. def children @children.dup end end htree-0.8/htree/elem.rb0000644000175000017500000001645711747021106014070 0ustar jonasjonasrequire 'htree/modules' require 'htree/tag' require 'htree/context' require 'htree/container' module HTree class Elem # :stopdoc: class << self alias new! new end # :startdoc: # The first argument _name_ should be an instance of String or HTree::Name. # # The rest of arguments should be a sequence of follows. # [Hash object] used as attributes. # [String object] specified string is converted to HTree::Text. # [HTree::Node object] used as a child. # [HTree::Doc object] # used as children. # It is expanded except HTree::XMLDecl and HTree::DocType objects. # [Array of String, HTree::Node, HTree::Doc] used as children. # [HTree::Context object] # used as as context which represents XML namespaces. # This should apper once at most. # # HTree::Location object is accepted just as HTree::Node. # # If the rest arguments consists only # Hash and HTree::Context, empty element is created. # # p HTree::Elem.new("e").empty_element? # => true # p HTree::Elem.new("e", []).empty_element? # => false def Elem.new(name, *args) attrs = [] children = [] context = nil args.each {|arg| arg = arg.to_node if HTree::Location === arg case arg when Context raise ArgumentError, "multiple context" if context context = arg when Hash arg.each {|k, v| attrs << [k, v] } when Array arg.each {|a| a = a.to_node if HTree::Location === a case a when HTree::Doc children.concat(a.children.reject {|c| HTree::XMLDecl === c || HTree::DocType === c }) when HTree::Node children << a when String children << Text.new(a) else raise TypeError, "unexpected argument: #{arg.inspect}" end } when HTree::Doc children.concat(arg.children.reject {|c| HTree::XMLDecl === c || HTree::DocType === c }) when HTree::Node children << arg when String children << Text.new(arg) else raise TypeError, "unexpected argument: #{arg.inspect}" end } context ||= DefaultContext if children.empty? && args.all? {|arg| Hash === arg || Context === arg } children = nil end new!(STag.new(name, attrs, context), children) end def initialize(stag, children=nil, etag=nil) # :notnew: unless stag.class == STag raise TypeError, "HTree::STag expected: #{stag.inspect}" end unless !children || children.all? {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) } unacceptable = children.reject {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) } unacceptable = unacceptable.map {|uc| uc.inspect }.join(', ') raise TypeError, "Unacceptable element child: #{unacceptable}" end unless !etag || etag.class == ETag raise TypeError, "HTree::ETag expected: #{etag.inspect}" end @stag = stag @children = (children ? children.dup : []).freeze @empty = children == nil && etag == nil @etag = etag end def context; @stag.context end # +element_name+ returns the name of the element name as a Name object. def element_name() @stag.element_name end def empty_element? @empty end def each_attribute(&block) # :yields: attr_name, attr_text @stag.each_attribute(&block) end def get_subnode_internal(index) # :nodoc: case index when String name = Name.parse_attribute_name(index, DefaultContext) update_attribute_hash[name.universal_name] when Name update_attribute_hash[index.universal_name] when Integer if index < 0 || @children.length <= index nil else @children[index] end else raise TypeError, "invalid index: #{index.inspect}" end end # call-seq: # elem.subst_subnode(pairs) -> elem # # The argument _pairs_ should be a hash or an assocs. # # The key of pairs should be one of following. # [HTree::Name or String object] attribute name. # [Integer object] child index. # # The value of pairs should be one of follows. # [HTree::Node object] specified object is used as is. # [String object] specified string is converted to HTree::Text # [Array of above] specified HTree::Node and String is used in that order. # [nil] delete corresponding node. # # e = HTree('').root # p e.subst_subnode({0=>HTree(''), 2=>HTree('')}) # p e.subst_subnode([[0, HTree('')], [2,HTree('')]]) # # => # {elem {emptyelem } {emptyelem } {emptyelem }} # {elem {emptyelem } {emptyelem } {emptyelem }} # def subst_subnode(pairs) hash = {} pairs.each {|index, value| case index when Name, Integer when String index = Name.parse_attribute_name(index, DefaultContext) else raise TypeError, "invalid index: #{index.inspect}" end value = value.to_node if HTree::Location === value case value when Node value = [value] when String value = [value] when Array value = value.dup when nil value = [] else raise TypeError, "invalid value: #{value.inspect}" end value.map! {|v| v = v.to_node if HTree::Location === v case v when Node v when String Text.new(v) else raise TypeError, "invalid value: #{v.inspect}" end } if !hash.include?(index) hash[index] = [] end hash[index].concat value } attrs = [] @stag.attributes.each {|k, v| if hash.include? k v = hash[k] if !v.empty? attrs << {k=>Text.concat(*v)} end hash.delete k else attrs << {k=>v} end } hash.keys.each {|k| if Name === k v = hash[k] if !v.empty? attrs << {k=>Text.concat(*v)} end hash.delete k end } children_left = [] children = @children.dup children_right = [] hash.keys.sort.each {|index| value = hash[index] if index < 0 children_left << value elsif children.length <= index children_right << value else children[index] = value end } children = [children_left, children, children_right].flatten if children.empty? && @empty Elem.new( @stag.element_name, @stag.context, *attrs) else Elem.new( @stag.element_name, @stag.context, children, *attrs) end end end module Elem::Trav private def update_attribute_hash if defined?(@attribute_hash) @attribute_hash else h = {} each_attribute {|name, text| h[name.universal_name] = text } @attribute_hash = h end end end end htree-0.8/htree/text.rb0000644000175000017500000000622311747021106014120 0ustar jonasjonasrequire 'htree/modules' require 'htree/raw_string' require 'htree/htmlinfo' require 'htree/encoder' require 'htree/fstr' if !"".respond_to?(:encode) require 'iconv' end module HTree class Text # :stopdoc: class << self alias new_internal new end # :startdoc: def Text.new(arg) arg = arg.to_node if HTree::Location === arg if Text === arg new_internal arg.rcdata, arg.normalized_rcdata elsif String === arg arg2 = arg.gsub(/&/, '&') arg = arg2.freeze if arg != arg2 new_internal arg else raise TypeError, "cannot initialize Text with #{arg.inspect}" end end def initialize(rcdata, normalized_rcdata=internal_normalize(rcdata)) # :notnew: init_raw_string @rcdata = rcdata && HTree.frozen_string(rcdata) @normalized_rcdata = @rcdata == normalized_rcdata ? @rcdata : normalized_rcdata end attr_reader :rcdata, :normalized_rcdata def internal_normalize(rcdata) # - character references are decoded as much as possible. # - undecodable character references are converted to decimal numeric character refereces. result = rcdata.gsub(/&(?:#([0-9]+)|#x([0-9a-fA-F]+)|([A-Za-z][A-Za-z0-9]*));/o) {|s| u = nil if $1 u = $1.to_i elsif $2 u = $2.hex elsif $3 u = NamedCharacters[$3] end if !u || u < 0 || 0x7fffffff < u '?' elsif u == 38 # '&' character. '&' elsif u <= 0x7f [u].pack("C") else us = [u].pack("U") if us.respond_to? :encode us.encode(Encoder.internal_charset, :xml=>:text) else begin Iconv.conv(Encoder.internal_charset, 'UTF-8', us) rescue Iconv::Failure "&##{u};" end end end } HTree.frozen_string(result) end private :internal_normalize # HTree::Text#to_s converts the text to a string. # - character references are decoded as much as possible. # - undecodable character reference are converted to `?' character. def to_s @normalized_rcdata.gsub(/&(?:#([0-9]+));/o) {|s| u = $1.to_i if 0 <= u && u <= 0x7f [u].pack("C") else '?' end } end def empty? @normalized_rcdata.empty? end def strip rcdata = @normalized_rcdata.dup rcdata.sub!(/\A(?:\s| )+/, '') rcdata.sub!(/(?:\s| )+\z/, '') if rcdata == @normalized_rcdata self else rcdata.freeze Text.new_internal(rcdata, rcdata) end end # HTree::Text.concat returns a text which is concatenation of arguments. # # An argument should be one of follows. # - String # - HTree::Text # - HTree::Location which points HTree::Text def Text.concat(*args) rcdata = '' args.each {|arg| arg = arg.to_node if HTree::Location === arg if Text === arg rcdata << arg.rcdata else rcdata << arg.gsub(/&/, '&') end } new_internal rcdata end end end htree-0.8/htree/context.rb0000644000175000017500000000371011747021106014616 0ustar jonasjonasmodule HTree class Context # :stopdoc: DefaultNamespaces = {'xml'=>'http://www.w3.org/XML/1998/namespace'} DefaultNamespaces.default = "" DefaultNamespaces.freeze # :startdoc: # The optional argument `namespaces' should be a hash or nil. # HTree::DefaultNamespaces is used if nil is specified. # # If it is a hash, its key should be nil or a string. # nil means default namespace. # The string means some prefix which must not be empty. # # The hash value should be a string. # The empty string "" means unbound namespace. def initialize(namespaces=nil) namespaces ||= DefaultNamespaces namespaces.each_pair {|k, v| check_namespace_prefix(k) check_namespace_uri(v) } namespaces = namespaces.dup.freeze unless namespaces.frozen? @namespaces = namespaces end attr_reader :namespaces # return a namespace URI corresponding to _prefix_. # It returns nil if _prefix_ is not defined. def namespace_uri(prefix) @namespaces[prefix] end # generate a new Context object which namespaces are substituted by # a hash _declared_namespaces_. def subst_namespaces(declared_namespaces) namespaces = @namespaces.dup declared_namespaces.each {|k, v| check_namespace_prefix(k) check_namespace_uri(v) namespaces[k] = v } if namespaces == @namespaces self else Context.new(namespaces) end end private def check_namespace_prefix(k) unless (String === k && !k.empty?) || k == nil raise ArgumentError, "invalid namespace prefix: #{k.inspect}" end end def check_namespace_uri(v) unless String === v raise ArgumentError, "invalid namespace URI: #{v.inspect}" end end end # :stopdoc: DefaultContext = Context.new HTMLContext = DefaultContext.subst_namespaces(nil=>"http://www.w3.org/1999/xhtml") # :startdoc: end htree-0.8/htree/equality.rb0000644000175000017500000001127711747021106014776 0ustar jonasjonasrequire 'htree/doc' require 'htree/elem' require 'htree/leaf' require 'htree/tag' require 'htree/raw_string' require 'htree/context' module HTree # compare tree structures. def ==(other) check_equality(self, other, :usual_equal_object) end alias eql? == # hash value for the tree structure. def hash return @hash_code if defined? @hash_code @hash_code = usual_equal_object.hash end # :stopdoc: def usual_equal_object return @usual_equal_object if defined? @usual_equal_object @usual_equal_object = make_usual_equal_object end def make_usual_equal_object raise NotImplementedError end def exact_equal_object return @exact_equal_object if defined? @exact_equal_object @exact_equal_object = make_exact_equal_object end def make_exact_equal_object raise NotImplementedError end def exact_equal?(other) check_equality(self, other, :exact_equal_object) end def check_equality(obj1, obj2, equal_object_method) return false unless obj1.class == obj2.class if obj1.class == Array return false unless obj1.length == obj2.length obj1.each_with_index {|c1, i| return false unless c1.class == obj2[i].class } obj1.each_with_index {|c1, i| return false unless check_equality(c1, obj2[i], equal_object_method) } true elsif obj1.respond_to? equal_object_method o1 = obj1.send(equal_object_method) o2 = obj2.send(equal_object_method) check_equality(o1, o2, equal_object_method) else obj1 == obj2 end end class Doc alias exact_equal_object children alias usual_equal_object children end class Elem def make_exact_equal_object [@stag, @children, @empty, @etag] end def make_usual_equal_object [@stag, @children] end end class Name def make_exact_equal_object [@namespace_prefix, @namespace_uri, @local_name] end def make_usual_equal_object xmlns? ? @local_name : [@namespace_uri, @local_name] end end module Util module_function def cmp_with_nil(a, b) if a == nil if b == nil 0 else -1 end else if b == nil 1 else a <=> b end end end end class Context def make_exact_equal_object @namespaces.keys.sort {|prefix1, prefix2| Util.cmp_with_nil(prefix1, prefix2) }.map {|prefix| [prefix, @namespaces[prefix]] } end # make_usual_equal_object is not used through STag#make_usual_equal_object # NotImplementedError is suitable? alias make_usual_equal_object make_exact_equal_object end class STag def make_exact_equal_object [@raw_string, @name, @attributes.sort {|(n1, _), (n2, _)| Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? || Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? || Util.cmp_with_nil(n1.local_name, n2.local_name) }, @inherited_context ] end def make_usual_equal_object [@name, @attributes.find_all {|n,t| !n.xmlns? }.sort {|(n1, _), (n2, _)| Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? || Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? || Util.cmp_with_nil(n1.local_name, n2.local_name) } ] end end class ETag def make_exact_equal_object [@raw_string, @qualified_name] end alias usual_equal_object qualified_name end class Text def make_exact_equal_object [@raw_string, @rcdata] end def make_usual_equal_object @normalized_rcdata end end class XMLDecl def make_exact_equal_object [@raw_string, @version, @encoding, @standalone] end def make_usual_equal_object [@version, @encoding, @standalone] end end class DocType def make_exact_equal_object [@raw_string, @root_element_name, @system_identifier, @public_identifier] end def make_usual_equal_object [@root_element_name, @system_identifier, @public_identifier] end end class ProcIns def make_exact_equal_object [@raw_string, @target, @content] end def make_usual_equal_object [@target, @content] end end class Comment def make_exact_equal_object [@raw_string, @content] end alias usual_equal_object content end class BogusETag def make_exact_equal_object [@etag] end alias usual_equal_object make_exact_equal_object end class Location def make_exact_equal_object [@parent, @index, @node] end alias usual_equal_object make_exact_equal_object end # :startdoc: end htree-0.8/htree/regexp-util.rb0000644000175000017500000000112511747021106015375 0ustar jonasjonasclass Regexp def disable_capture re = '' charclass_p = false self.source.scan(/\\.|[^\\\(\[\]]+|\(\?|\(|\[|\]/m) {|s| case s when '(' if charclass_p re << '(' else re << '(?:' end when '[' charclass_p = true re << s when ']' charclass_p = false re << s else re << s end } if re.respond_to? :force_encoding re.force_encoding(self.encoding) Regexp.new(re, self.options) else Regexp.new(re, self.options, self.kcode) end end end htree-0.8/htree/template.rb0000644000175000017500000007534611747021106014763 0ustar jonasjonas# = Template Engine # # The htree template engine converts HTML and some data to HTML or XML. # # == Template Method Summary # # - HTree.expand_template(template_pathname) -> $stdout # - HTree.expand_template(template_pathname, obj) -> $stdout # - HTree.expand_template(template_pathname, obj, out) -> out # - HTree.expand_template(template_pathname, obj, out, encoding) -> out # # - HTree.expand_template{template_string} -> $stdout # - HTree.expand_template(out) {template_string} -> out # - HTree.expand_template(out, encoding) {template_string} -> out # # - HTree.compile_template(template_string) -> Module # - HTree{template_string} -> HTree::Doc # # Note that the following method, HTree(), is not a template method. # # - HTree(html_string) -> HTree::Doc # # == Template Directives. # # A template directive is described as a special HTML attribute which name # begins with underscore. # # The template directives are listed as follows. # # - name="expr">content # - dummy-content # - expr # - dummy-content # - expr # - then-content # - content # - content # - dummy-content # - body # # === Template Semantics # # - attribute substitution # - name="expr">content # # \_attr_name is used for a dynamic attribute. # # # -> # # It is expanded to name="content". # The content is generated by evaluating _expr_. # Usually you don't need to care escaping: &, <, > and " are automatically escaped. # If you need to output character references, # the value of _expr_ should be an object which have a +rcdata+ method such as an HTree::Text. # If the value has a +rcdata+ method, # it is called and the result is used as the content with escaping <, > and ". # # \_attr_name can be used multiple times in single element. # # - text substitution # - dummy-content # - expr # # _text substitutes the content of the element by the string # evaluated from _expr_. # _expr_ is described in the attribute value or the content of the element. # # If a result of _expr_ have &, < and/or >, they are automatically escaped. # If you need to output character references, # the value of _expr_ should be an object which have a +rcdata+ method such as an HTree::Text. # If the value has a +rcdata+ method, # it is called and the result is used as the content with escaping < and >. # # If the element is span or div, and there is no other attributes, # no tags are produced. # # dummy-content # -> ... # # - tree substitution # - dummy-content # - expr # # _tree substitutes the content of the element by the htree object # evaluated from _expr_. # _expr_ is described in the attribute value or the content of the element. # # If the element is span or div, and there is no other attributes, # no tags are produced. # # dummy-content # -> ... # # - conditional # - then-content # - then-content # # _if is used for conditional. # # If expr is evaluated to true, it expands as follows # regardless of existence of _else. # # then-content # -> then-content # # If expr is evaluated to false, it expands using _else. # If _else is not given, it expands to empty. # If _else is given, it expands as follows. # # then-content # -> then-content # -> see _call for further expansion. # # It is expanded to then-content if _expr_ is evaluated to # a true value. # Otherwise, it is replaced by other template specified by _else attribute. # If _else attribute is not given, it just replaced by empty. # # - iteration # - content # - content # # _iter and _iter_content is used for iteration. # _iter iterates the element itself but _iter_content iterates the content. # # # -> ... # # # -> ... # # expr.meth(args) specifies iterator method call. # It is actually called with a block. # The block have block parameters vars. # vars must be variables separated by comma. # # - template call # - dummy-content # - dummy-content # # _call is used to expand a template function. # The template function is defined by _template. # # ... # ... # -> ... # # A local template can be called as follows: # # HTree.expand_template{<<'End'} # [ruby-talk:nnn] # Ruby 1.8.0 is released at . # Ruby 1.8.1 is released at . # End # # mod should be the result of HTree.compile_template. # # M = HTree.compile_template(<<'End') # [ruby-talk:nnn] # End # HTree.expand_template{<<'End'} # # Ruby 1.8.0 is released at . # Ruby 1.8.1 is released at . # # End # # The module can included. # In such case, the template function can be called without mod. # prefix. # # include HTree.compile_template(<<'End') # [ruby-talk:nnn] # End # HTree.expand_template{<<'End'} # # Ruby 1.8.0 is released at . # Ruby 1.8.1 is released at . # # End # # - template definition # - body # # _template defines a template function which is usable by _call. # # When a template is compiled to a module by HTree.compile_template, # the module have a module function for each template function # defined by outermost _template attribute. # # === White Space Handling # # The htree template engine strips whitespace text nodes in a template # except under HTML pre element. # # For example the white space text node between two spans in following template is stripped. # # -> "ab" # # Character entity references are not stripped. # # -> "a b" # # Text nodes generated by _text is not stripped. # # -> "a b" # # == HTML and XML # # The htree template engine outputs HTML or XML. # # If a template has no XML declaration and the top element is HTML, # the result is HTML. # Otherwise the result is XML. # # They differs as follows. # # - XML declaration is (re-)generated for XML. # - empty elements ends with a slash for XML. # - script and style element is escaped for XML. # # == Design Decision on Design/Logic Separation # # HTree template engine doesn't force you to separate design and logic. # Any logic (Ruby code) can be embedded in design (HTML). # # However the template engine cares the separation by logic refactorings. # The logic is easy to move between a template and an application. # For example, following tangled template # # tmpl.html: # # # dummy # # ... # # # app.rb: # HTree.expand_template('tmpl.html', obj) # # can be refactored as follows. # # tmpl.html: # # # dummy # # ... # # # app.rb: # def obj.title # very-complex-ruby-code # end # HTree.expand_template('tmpl.html', obj) # # In general, any expression in a template can be refactored to an application # by extracting it as a method. # In JSP, this is difficult especially for a code fragment of an iteration. # # Also HTree encourages to separate business logic (Ruby code in an application) # and presentation logic (Ruby code in a template). # For example, presentation logic to color table rows stripe # can be embedded in a template. # It doesn't need to tangle an application. # module HTree # :stopdoc: EmptyBindingObject = Object.new # :startdoc: end # :stopdoc: htree_emptybindingobject = HTree::EmptyBindingObject def htree_emptybindingobject.empty_binding binding end # :startdoc: require 'htree/parse' require 'htree/gencode' require 'htree/equality' require 'htree/traverse' # call-seq: # HTree.expand_template(template_pathname, obj=Object.new, out=$stdout, encoding=internal_encoding) -> out # HTree.expand_template(out=$stdout, encoding=internal_encoding) { template_string } -> out # # HTree.expand_template expands a template. # # The arguments should be specified as follows. # All argument except pathname are optional. # # - HTree.expand_template(pathname, obj, out, encoding) -> out # - HTree.expand_template(out, encoding) {template_string} -> out # # The template is specified by a file or a string. # If a block is not given, the first argument represent a template pathname. # Otherwise, the block is yielded and its value is interpreted as a template # string. # So it can be called as follows in simplest case. # # - HTree.expand_template(template_pathname) # - HTree.expand_template{template_string} # # Ruby expressions in the template file specified by _template_pathname_ are # evaluated in the context of the optional second argument obj as follows. # I.e. the pseudo variable self in the expressions is bound to obj. # # HTree.expand_template(template_pathname, obj) # # Ruby expressions in the template_string are evaluated # in the context of the caller of HTree.expand_template. # (binding information is specified by the block.) # I.e. they can access local variables etc. # We recommend to specify template_string as a literal string without # interpolation because dynamically generated string may break lexical scope. # # HTree.expand_template has two more optional arguments: # out, encoding. # # out specifies output target. # It should have << method: IO and String for example. # If it is not specified, $stdout is used. # If it has a method charset=, it is called to set the minimal charset # of the result before << is called. # # encoding specifies output character encoding. # If it is not specified, internal encoding is used. # # HTree.expand_template returns out or $stdout if out is not # specified. # def HTree.expand_template(*args, &block) if block template = block.call binding = block.binding else pathname = args.fetch(0) { raise ArgumentError, "pathname not given" } args.shift obj = args.fetch(0) { Object.new } args.shift if pathname.respond_to? :read template = pathname.read.untaint if template.respond_to? :charset if template.respond_to? :encode template = template.encode(HTree::Encoder.internal_charset, template.charset) else template = Iconv.conv(HTree::Encoder.internal_charset, template.charset, template) end end else template = File.read(pathname).untaint end Thread.current[:htree_expand_template_obj] = obj binding = eval(<<-'End', Thread.current[:htree_expand_template_obj].class.class_eval <<-'EE' Thread.current[:htree_expand_template_obj].instance_eval { binding } EE End HTree::EmptyBindingObject.empty_binding, "(eval:#{__FILE__}:#{__LINE__})") Thread.current[:htree_expand_template_obj] = nil end out = args.shift || $stdout encoding = args.shift || HTree::Encoder.internal_charset if !args.empty? raise ArgumentError, "wrong number of arguments" end HTree::TemplateCompiler.new.expand_template(template, out, encoding, binding) end # call-seq: # HTree(html_string) -> doc # HTree{template_string} -> doc # # HTree(html_string) parses html_string. # HTree{template_string} parses template_string and expand it as a template. # Ruby expressions in template_string is evaluated in the scope of the caller. # # HTree() and HTree{} returns a tree as an instance of HTree::Doc. def HTree(html_string=nil, &block) if block_given? raise ArgumentError, "both argument and block given." if html_string template = block.call HTree.parse(HTree::TemplateCompiler.new.expand_template(template, '', HTree::Encoder.internal_charset, block.binding)) else HTree.parse(html_string) end end # call-seq: # HTree.compile_template(template_string) -> module # # HTree.compile_template(template_string) compiles # template_string as a template. # # HTree.compile_template returns a module. # The module has module functions for each templates defined in # template_string. # The returned module can be used for +include+. # # M = HTree.compile_template(<<'End') #

# 's birthday is . #

# End # M.birthday('Ruby', Time.utc(1993, 2, 24)).display_xml # #

Ruby's birthday is February 24th 1993.

# # The module function takes arguments specifies by a _template # attribute and returns a tree represented as HTree::Node. # def HTree.compile_template(template_string) code = HTree::TemplateCompiler.new.compile_template(template_string) Thread.current[:htree_compile_template_code] = code mod = eval(<<-'End', eval(Thread.current[:htree_compile_template_code]) End HTree::EmptyBindingObject.empty_binding, "(eval:#{__FILE__}:#{__LINE__})") Thread.current[:htree_compile_template_code] = nil mod end # :stopdoc: class HTree::TemplateCompiler IGNORABLE_ELEMENTS = { 'span' => true, 'div' => true, '{http://www.w3.org/1999/xhtml}span' => true, '{http://www.w3.org/1999/xhtml}div' => true, } def initialize @gensym_id = 0 end def gensym(suffix='') @gensym_id += 1 "g#{@gensym_id}#{suffix}" end def parse_template(template) strip_whitespaces(HTree.parse(template)) end WhiteSpacePreservingElements = { '{http://www.w3.org/1999/xhtml}pre' => true } def strip_whitespaces(template) case template when HTree::Doc HTree::Doc.new(*template.children.map {|c| strip_whitespaces(c) }.compact) when HTree::Elem, HTree::Doc return template if WhiteSpacePreservingElements[template.name] subst = {} template.children.each_with_index {|c, i| subst[i] = strip_whitespaces(c) } template.subst_subnode(subst) when HTree::Text if /\A[ \t\r\n]*\z/ =~ template.rcdata nil else template end else template end end def template_is_html(template) template.each_child {|c| return false if c.xmldecl? return true if c.elem? && c.element_name.namespace_uri == 'http://www.w3.org/1999/xhtml' } false end def expand_template(template, out, encoding, binding) template = parse_template(template) is_html = template_is_html(template) outvar = gensym('out') contextvar = gensym('top_context') code = '' code << "#{outvar} = HTree::Encoder.new(#{encoding.dump})\n" code << "#{outvar}.html_output = true\n" if is_html code << "#{contextvar} = #{is_html ? "HTree::HTMLContext" : "HTree::DefaultContext"}\n" code << compile_body(outvar, contextvar, template, false) code << "[#{outvar}.#{is_html ? "finish" : "finish_with_xmldecl"}, #{outvar}.minimal_charset]\n" #puts code; STDOUT.flush result, minimal_charset = eval(code, binding, "(eval:#{__FILE__}:#{__LINE__})") out.charset = minimal_charset if out.respond_to? :charset= out << result out end def compile_template(src) srcdoc = parse_template(src) templates = [] extract_templates(srcdoc, templates, true) methods = [] templates.each {|name_args, node| methods << compile_global_template(name_args, node) } <<"End" require 'htree/encoder' require 'htree/context' Module.new.module_eval <<'EE' module_function #{methods.join('').chomp} self EE End end def template_attribute?(name) /\A_/ =~ name.local_name end def extract_templates(node, templates, is_toplevel) case node when HTree::Doc subst = {} node.children.each_with_index {|n, i| subst[i] = extract_templates(n, templates, is_toplevel) } node.subst_subnode(subst) when HTree::Elem ht_attrs, = node.attributes.partition {|name, text| template_attribute? name } if ht_attrs.empty? subst = {} node.children.each_with_index {|n, i| subst[i] = extract_templates(n, templates, is_toplevel) } node.subst_subnode(subst) else ht_attrs.each {|htname, text| if htname.universal_name == '_template' name_fargs = text.to_s templates << [name_fargs, node.subst_subnode('_template' => nil)] return nil end } if is_toplevel raise HTree::Error, "unexpected template attributes in toplevel: #{ht_attrs.inspect}" else node end end else node end end ID_PAT = /[a-z][a-z0-9_]*/ NAME_FARGS_PAT = /(#{ID_PAT})(?:\(\s*(|#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)\))?/ def compile_global_template(name_fargs, node) unless /\A#{NAME_FARGS_PAT}\z/o =~ name_fargs raise HTree::Error, "invalid template declaration: #{name_fargs}" end name = $1 fargs = $2 ? $2.scan(ID_PAT) : [] outvar = gensym('out') contextvar = gensym('top_context') args2 = [outvar, contextvar, *fargs] <<"End" def #{name}(#{fargs.join(',')}) HTree.parse(_xml_#{name}(#{fargs.join(',')})) end def _xml_#{name}(#{fargs.join(',')}) #{outvar} = HTree::Encoder.new(HTree::Encoder.internal_charset) #{contextvar} = HTree::DefaultContext _ht_#{name}(#{args2.join(',')}) #{outvar}.finish end def _ht_#{name}(#{args2.join(',')}) #{compile_body(outvar, contextvar, node, false)}\ end public :_ht_#{name} End end def compile_local_template(name_fargs, node, local_templates) unless /\A#{NAME_FARGS_PAT}\z/o =~ name_fargs raise HTree::Error, "invalid template declaration: #{name_fargs}" end name = $1 fargs = $2 ? $2.scan(ID_PAT) : [] outvar = gensym('out') contextvar = gensym('top_context') args2 = [outvar, contextvar, *fargs] <<"End" #{name} = lambda {|#{args2.join(',')}| #{compile_body(outvar, contextvar, node, false, local_templates)}\ } End end def compile_body(outvar, contextvar, node, is_toplevel, local_templates={}) if node.elem? && IGNORABLE_ELEMENTS[node.name] && node.attributes.empty? node = TemplateNode.new(node.children) else node = TemplateNode.new(node) end generate_logic_node([:content], node, local_templates).generate_xml_output_code(outvar, contextvar) end def compile_node(node, local_templates) case node when HTree::Doc TemplateNode.new(node.children.map {|n| compile_node(n, local_templates) }) when HTree::Elem ht_attrs = node.attributes.find_all {|name, text| template_attribute? name } ht_attrs = ht_attrs.sort_by {|htname, text| htname.universal_name } ignore_tag = false unless ht_attrs.empty? attr_mod = {} ht_attrs.each {|htname, text| attr_mod[htname] = nil if /\A_attr_/ =~ htname.local_name attr_mod[TemplateAttrName.new(htname.namespace_prefix, htname.namespace_uri, $')] = text end } ht_attrs.reject! {|htname, text| /\A_attr_/ =~ htname.local_name } node = node.subst_subnode(attr_mod) ignore_tag = IGNORABLE_ELEMENTS[node.name] && node.attributes.empty? end ht_names = ht_attrs.map {|htname, text| htname.universal_name } ht_vals = ht_attrs.map {|htname, text| text.to_s } case ht_names when [] generate_logic_node([:tag, [:content]], node, local_templates) when ['_text'] # or expr if ht_vals[0] != '_text' # xxx: attribute value is really omitted? expr = ht_vals[0] else children = node.children if children.length != 1 raise HTree::Error, "_text expression has #{children.length} nodes" end if !children[0].text? raise HTree::Error, "_text expression is not text: #{children[0].class}" end expr = children[0].to_s end if ignore_tag && /\A\s*'((?:[^'\\]|\\.)*)'\s*\z/m =~ expr # if expr is just a constant string literal, use it as a literal text. # This saves dynamic evaluation of # xxx: handle "..." as well if it has no #{}. HTree::Text.new($1.gsub(/\\(.)/m, '\1')) else generate_logic_node(compile_dynamic_text(ignore_tag, expr), node, local_templates) end when ['_tree'] # or expr if ht_vals[0] != '_tree' # xxx: attribute value is really omitted? expr = ht_vals[0] else children = node.children if children.length != 1 raise HTree::Error, "_tree expression has #{children.length} nodes" end if !children[0].text? raise HTree::Error, "_tree expression is not text: #{children[0].class}" end expr = children[0].to_s end generate_logic_node(compile_dynamic_tree(ignore_tag, expr), node, local_templates) when ['_if'] # ... generate_logic_node(compile_if(ignore_tag, ht_vals[0], nil), node, local_templates) when ['_else', '_if'] # ... generate_logic_node(compile_if(ignore_tag, ht_vals[1], ht_vals[0]), node, local_templates) when ['_call'] # generate_logic_node(compile_call(ignore_tag, ht_vals[0]), node, local_templates) when ['_iter'] # ... generate_logic_node(compile_iter(ignore_tag, ht_vals[0]), node, local_templates) when ['_iter_content'] # ... generate_logic_node(compile_iter_content(ignore_tag, ht_vals[0]), node, local_templates) else raise HTree::Error, "unexpected template attributes: #{ht_attrs.inspect}" end else return node end end def valid_syntax?(code) begin eval("BEGIN {return true}\n#{code.untaint}") rescue SyntaxError raise SyntaxError, "invalid code: #{code}" end end def check_syntax(code) unless valid_syntax?(code) raise HTree::Error, "invalid ruby code: #{code}" end end def compile_dynamic_text(ignore_tag, expr) check_syntax(expr) logic = [:text, expr] logic = [:tag, logic] unless ignore_tag logic end def compile_dynamic_tree(ignore_tag, expr) check_syntax(expr) logic = [:tree, expr] logic = [:tag, logic] unless ignore_tag logic end def compile_if(ignore_tag, expr, else_call) check_syntax(expr) then_logic = [:content] unless ignore_tag then_logic = [:tag, then_logic] end else_logic = nil if else_call else_logic = compile_call(true, else_call) end [:if, expr, then_logic, else_logic] end def split_args(spec) return spec, '' if /\)\z/ !~ spec i = spec.length - 1 nest = 0 begin raise HTree::Error, "unmatched paren: #{spec}" if i < 0 case spec[i] when ?\) nest += 1 when ?\( nest -= 1 end i -= 1 end while nest != 0 i += 1 return spec[0, i], spec[(i+1)...-1] end def compile_call(ignore_tag, spec) # spec : [recv.]meth[(args)] spec = spec.strip spec, args = split_args(spec) unless /#{ID_PAT}\z/o =~ spec raise HTree::Error, "invalid _call: #{spec}" end meth = $& spec = $` if /\A\s*\z/ =~ spec recv = nil elsif /\A\s*(.*)\.\z/ =~ spec recv = $1 else raise HTree::Error, "invalid _call: #{spec}" end if recv check_syntax(recv) check_syntax("#{recv}.#{meth}(#{args})") end check_syntax("#{meth}(#{args})") [:call, recv, meth, args] end def compile_iter(ignore_tag, spec) # spec: ... spec = spec.strip unless %r{\s*//\s*(#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)?\z}o =~ spec raise HTree::Error, "invalid block arguments for _iter: #{spec}" end call = $`.strip fargs = $1 ? $1.strip : '' check_syntax("#{call} {|#{fargs}| }") logic = [:content] unless ignore_tag logic = [:tag, logic] end [:iter, call, fargs, logic] end def compile_iter_content(ignore_tag, spec) # spec: ... spec = spec.strip unless %r{\s*//\s*(#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)?\z}o =~ spec raise HTree::Error, "invalid block arguments for _iter: #{spec}" end call = $`.strip fargs = $1 ? $1.strip : '' check_syntax("#{call} {|#{fargs}| }") logic = [:content] logic = [:iter, call, fargs, logic] unless ignore_tag logic = [:tag, logic] end logic end def generate_logic_node(logic, node, local_templates) # logic ::= [:if, expr, then_logic, else_logic] # | [:iter, call, fargs, logic] # | [:tag, logic] # | [:text, expr] # | [:tree, expr] # | [:call, expr, meth, args] # | [:content] # | [:empty] case logic.first when :empty nil when :content subtemplates = [] children = [] node.children.each {|c| children << extract_templates(c, subtemplates, false) } if subtemplates.empty? TemplateNode.new(node.children.map {|n| compile_node(n, local_templates) }) else local_templates = local_templates.dup decl = '' subtemplates.each {|sub_name_args, sub_node| sub_name = sub_name_args[ID_PAT] local_templates[sub_name] = sub_name decl << "#{sub_name} = " } decl << "nil\n" defs = [] subtemplates.each {|sub_name_args, sub_node| defs << lambda {|out, context| out.output_logic_line compile_local_template(sub_name_args, sub_node, local_templates) } } TemplateNode.new( lambda {|out, context| out.output_logic_line decl }, defs, children.map {|n| compile_node(n, local_templates) } ) end when :text _, expr = logic TemplateNode.new(lambda {|out, context| out.output_dynamic_text expr }) when :tree _, expr = logic TemplateNode.new(lambda {|out, context| out.output_dynamic_tree expr, make_context_expr(out, context) }) when :tag _, rest_logic = logic if rest_logic == [:content] && node.empty_element? node else subst = {} node.children.each_index {|i| subst[i] = nil } subst[0] = TemplateNode.new(generate_logic_node(rest_logic, node, local_templates)) node.subst_subnode(subst) end when :if _, expr, then_logic, else_logic = logic children = [ lambda {|out, context| out.output_logic_line "if (#{expr})" }, generate_logic_node(then_logic, node, local_templates) ] if else_logic children.concat [ lambda {|out, context| out.output_logic_line "else" }, generate_logic_node(else_logic, node, local_templates) ] end children << lambda {|out, context| out.output_logic_line "end" } TemplateNode.new(*children) when :iter _, call, fargs, rest_logic = logic TemplateNode.new( lambda {|out, context| out.output_logic_line "#{call} {|#{fargs}|" }, generate_logic_node(rest_logic, node, local_templates), lambda {|out, context| out.output_logic_line "}" } ) when :call _, recv, meth, args = logic TemplateNode.new( lambda {|out, context| as = [out.outvar, ", ", make_context_expr(out, context)] unless args.empty? as << ", " << args end if recv out.output_logic_line "(#{recv})._ht_#{meth}(#{as.join('')})" elsif local_templates.include? meth out.output_logic_line "#{meth}.call(#{as.join('')})" else out.output_logic_line "_ht_#{meth}(#{as.join('')})" end } ) else raise Exception, "[bug] invalid logic: #{logic.inspect}" end end def make_context_expr(out, context) ns = context.namespaces.reject {|k, v| HTree::Context::DefaultNamespaces[k] == v } if ns.empty? result = out.contextvar else result = "#{out.contextvar}.subst_namespaces(" sep = '' ns.each {|k, v| result << sep << (k ? k.dump : "nil") << '=>' << v.dump sep = ', ' } result << ")" end result end class TemplateNode include HTree::Node def initialize(*children) @children = children.flatten.compact end attr_reader :children def output(out, context) @children.each {|c| if c.respond_to? :call c.call(out, context) else c.output(out, context) end } end end class TemplateAttrName < HTree::Name def output_attribute(text, out, context) output(out, context) out.output_string '="' out.output_dynamic_attvalue(text.to_s) out.output_string '"' end end end # :startdoc: htree-0.8/htree/rexml.rb0000644000175000017500000000624211747021106014264 0ustar jonasjonas# = REXML Tree Generator # # HTree::Node#to_rexml is used for converting HTree to REXML. # # == Method Summary # # - HTree::Node#to_rexml -> REXML::Child # # == Example # # HTree.parse(...).to_rexml #=> REXML::Document # # == Comparison between HTree and REXML. # # - HTree parser is permissive HTML/XML parser. # REXML parser is strict XML parser. # HTree is recommended if you need to parse realworld HTML. # REXML is recommended if you need strict error checking. # - HTree object is immutable. # REXML object is mutable. # REXML should be used if you need modification. # require 'htree/modules' require 'htree/output' # HTree::DocType#generate_content module HTree module Node # convert to REXML tree. def to_rexml require 'rexml/document' to_rexml_internal(nil, DefaultContext) end end # :stopdoc: class Doc def to_rexml_internal(parent, context) raise ArgumentError, "parent must be nil" if parent != nil result = REXML::Document.new self.children.each {|c| c.to_rexml_internal(result, context) } result end end class Elem def to_rexml_internal(parent, context) ename = self.element_name ns_decl = {} if context.namespace_uri(ename.namespace_prefix) != ename.namespace_uri ns_decl[ename.namespace_prefix] = ename.namespace_uri end if ename.namespace_prefix result = REXML::Element.new("#{ename.namespace_prefix}:#{ename.local_name}", parent) else result = REXML::Element.new(ename.local_name, parent) end self.each_attribute {|aname, atext| if aname.namespace_prefix if context.namespace_uri(aname.namespace_prefix) != aname.namespace_uri ns_decl[aname.namespace_prefix] = aname.namespace_uri end result.add_attribute("#{aname.namespace_prefix}:#{aname.local_name}", atext.to_s) else result.add_attribute(aname.local_name, atext.to_s) end } ns_decl.each {|k, v| if k result.add_namespace(k, v) else result.add_namespace(v) end } context = context.subst_namespaces(ns_decl) self.children.each {|c| c.to_rexml_internal(result, context) } result end end class Text def to_rexml_internal(parent, context) rcdata = self.rcdata.gsub(/[<>]/) { Encoder::ChRef[$&] } REXML::Text.new(rcdata, true, parent, true) end end class XMLDecl def to_rexml_internal(parent, context) r = REXML::XMLDecl.new(self.version, self.encoding, self.standalone) parent << r if parent r end end class DocType def to_rexml_internal(parent, context) REXML::DocType.new([self.root_element_name, self.generate_content], parent) end end class ProcIns def to_rexml_internal(parent, context) r = REXML::Instruction.new(self.target, self.content) parent << r if parent r end end class Comment def to_rexml_internal(parent, context) REXML::Comment.new(self.content, parent) end end class BogusETag def to_rexml_internal(parent, context) nil end end # :startdoc: end htree-0.8/htree/modules.rb0000644000175000017500000000407411747021106014606 0ustar jonasjonasmodule HTree class Name; include HTree end class Context; include HTree end # :stopdoc: module Tag; include HTree end class STag; include Tag end class ETag; include Tag end # :startdoc: module Node; include HTree end module Container; include Node end class Doc; include Container end class Elem; include Container end module Leaf; include Node end class Text; include Leaf end class XMLDecl; include Leaf end class DocType; include Leaf end class ProcIns; include Leaf end class Comment; include Leaf end class BogusETag; include Leaf end module Traverse end module Container::Trav; include Traverse end module Leaf::Trav; include Traverse end class Doc; module Trav; include Container::Trav end; include Trav end class Elem; module Trav; include Container::Trav end; include Trav end class Text; module Trav; include Leaf::Trav end; include Trav end class XMLDecl; module Trav; include Leaf::Trav end; include Trav end class DocType; module Trav; include Leaf::Trav end; include Trav end class ProcIns; module Trav; include Leaf::Trav end; include Trav end class Comment; module Trav; include Leaf::Trav end; include Trav end class BogusETag; module Trav; include Leaf::Trav end; include Trav end class Location; include HTree end module Container::Loc end module Leaf::Loc end class Doc; class Loc < Location; include Trav, Container::Loc end end class Elem; class Loc < Location; include Trav, Container::Loc end end class Text; class Loc < Location; include Trav, Leaf::Loc end end class XMLDecl; class Loc < Location; include Trav, Leaf::Loc end end class DocType; class Loc < Location; include Trav, Leaf::Loc end end class ProcIns; class Loc < Location; include Trav, Leaf::Loc end end class Comment; class Loc < Location; include Trav, Leaf::Loc end end class BogusETag; class Loc < Location; include Trav, Leaf::Loc end end class Error < StandardError; end end htree-0.8/htree/htmlinfo.rb0000644000175000017500000011056611747021106014762 0ustar jonasjonasmodule HTree # The code below is auto-generated. Don't edit manually. # :stopdoc: NamedCharacters = {"AElig"=>198, "Aacute"=>193, "Acirc"=>194, "Agrave"=>192, "Alpha"=>913, "Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199, "Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201, "Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203, "Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921, "Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925, "OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937, "Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934, "Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931, "THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219, "Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221, "Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180, "aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38, "and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776, "atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166, "bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162, "chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169, "crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224, "darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247, "eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195, "ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240, "euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704, "frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947, "ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829, "hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236, "image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191, "isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955, "lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220, "le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206, "lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212, "micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711, "nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713, "nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244, "oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959, "oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248, "otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706, "permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982, "plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733, "psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002, "raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476, "reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250, "rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167, "shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824, "sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178, "sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756, "theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732, "times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593, "ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965, "uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165, "yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204} NamedCharactersPattern = /\A(?-mix:AElig|Aacute|Acirc|Agrave|Alpha|Aring|Atilde|Auml|Beta|Ccedil|Chi|Dagger|Delta|ETH|Eacute|Ecirc|Egrave|Epsilon|Eta|Euml|Gamma|Iacute|Icirc|Igrave|Iota|Iuml|Kappa|Lambda|Mu|Ntilde|Nu|OElig|Oacute|Ocirc|Ograve|Omega|Omicron|Oslash|Otilde|Ouml|Phi|Pi|Prime|Psi|Rho|Scaron|Sigma|THORN|Tau|Theta|Uacute|Ucirc|Ugrave|Upsilon|Uuml|Xi|Yacute|Yuml|Zeta|aacute|acirc|acute|aelig|agrave|alefsym|alpha|amp|and|ang|apos|aring|asymp|atilde|auml|bdquo|beta|brvbar|bull|cap|ccedil|cedil|cent|chi|circ|clubs|cong|copy|crarr|cup|curren|dArr|dagger|darr|deg|delta|diams|divide|eacute|ecirc|egrave|empty|emsp|ensp|epsilon|equiv|eta|eth|euml|euro|exist|fnof|forall|frac12|frac14|frac34|frasl|gamma|ge|gt|hArr|harr|hearts|hellip|iacute|icirc|iexcl|igrave|image|infin|int|iota|iquest|isin|iuml|kappa|lArr|lambda|lang|laquo|larr|lceil|ldquo|le|lfloor|lowast|loz|lrm|lsaquo|lsquo|lt|macr|mdash|micro|middot|minus|mu|nabla|nbsp|ndash|ne|ni|not|notin|nsub|ntilde|nu|oacute|ocirc|oelig|ograve|oline|omega|omicron|oplus|or|ordf|ordm|oslash|otilde|otimes|ouml|para|part|permil|perp|phi|pi|piv|plusmn|pound|prime|prod|prop|psi|quot|rArr|radic|rang|raquo|rarr|rceil|rdquo|real|reg|rfloor|rho|rlm|rsaquo|rsquo|sbquo|scaron|sdot|sect|shy|sigma|sigmaf|sim|spades|sub|sube|sum|sup|sup1|sup2|sup3|supe|szlig|tau|there4|theta|thetasym|thinsp|thorn|tilde|times|trade|uArr|uacute|uarr|ucirc|ugrave|uml|upsih|upsilon|uuml|weierp|xi|yacute|yen|yuml|zeta|zwj|zwnj)\z/ ElementContent = {"h6"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "object"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "dl"=>["dd", "dt"], "p"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "acronym"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "code"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "ul"=>["li"], "tt"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "label"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "form"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "q"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "thead"=>["tr"], "area"=>:EMPTY, "td"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "title"=>[], "dir"=>["li"], "s"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "ol"=>["li"], "hr"=>:EMPTY, "applet"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "table"=>["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"], "legend"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "cite"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "a"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "html"=> ["a", "abbr", "acronym", "address", "applet", "b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "title", "tt", "u", "ul", "var"], "u"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "blockquote"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "center"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "b"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "base"=>:EMPTY, "th"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "link"=>:EMPTY, "var"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "samp"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "div"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "textarea"=>[], "pre"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "head"=>["base", "isindex", "title"], "span"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "br"=>:EMPTY, "script"=>:CDATA, "noframes"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "style"=>:CDATA, "meta"=>:EMPTY, "dt"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "option"=>[], "kbd"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "big"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "tfoot"=>["tr"], "sup"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "bdo"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "isindex"=>:EMPTY, "dfn"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "fieldset"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "legend", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "em"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "font"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "tbody"=>["tr"], "noscript"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "li"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "col"=>:EMPTY, "small"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "dd"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "i"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "menu"=>["li"], "strong"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "basefont"=>:EMPTY, "img"=>:EMPTY, "optgroup"=>["option"], "map"=> ["address", "area", "blockquote", "center", "dir", "div", "dl", "fieldset", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes", "noscript", "ol", "p", "pre", "table", "ul"], "h1"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "address"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "p", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "sub"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "param"=>:EMPTY, "input"=>:EMPTY, "h2"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "abbr"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "h3"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "strike"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "body"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "ins"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "button"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "h4"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "select"=>["optgroup", "option"], "caption"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "colgroup"=>["col"], "tr"=>["td", "th"], "del"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"], "h5"=> ["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br", "button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd", "label", "map", "object", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "textarea", "tt", "u", "var"], "iframe"=> ["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big", "blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span", "strike", "strong", "sub", "sup", "table", "textarea", "tt", "u", "ul", "var"]} ElementInclusions = {"head"=>["link", "meta", "object", "script", "style"], "body"=>["del", "ins"]} ElementExclusions = {"button"=> ["a", "button", "fieldset", "form", "iframe", "input", "isindex", "label", "select", "textarea"], "a"=>["a"], "dir"=> ["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes", "noscript", "ol", "p", "pre", "table", "ul"], "title"=>["link", "meta", "object", "script", "style"], "pre"=> ["applet", "basefont", "big", "font", "img", "object", "small", "sub", "sup"], "form"=>["form"], "menu"=> ["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes", "noscript", "ol", "p", "pre", "table", "ul"], "label"=>["label"]} OmittedAttrName = {"h6"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "object"=> {"bottom"=>"align", "declare"=>"declare", "left"=>"align", "ltr"=>"dir", "middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"}, "dl"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"}, "p"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "acronym"=>{"ltr"=>"dir", "rtl"=>"dir"}, "code"=>{"ltr"=>"dir", "rtl"=>"dir"}, "ul"=> {"circle"=>"type", "compact"=>"compact", "disc"=>"type", "ltr"=>"dir", "rtl"=>"dir", "square"=>"type"}, "tt"=>{"ltr"=>"dir", "rtl"=>"dir"}, "label"=>{"ltr"=>"dir", "rtl"=>"dir"}, "form"=>{"get"=>"method", "ltr"=>"dir", "post"=>"method", "rtl"=>"dir"}, "q"=>{"ltr"=>"dir", "rtl"=>"dir"}, "thead"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "area"=> {"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "nohref"=>"nohref", "poly"=>"shape", "rect"=>"shape", "rtl"=>"dir"}, "td"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap", "right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir", "top"=>"valign"}, "title"=>{"ltr"=>"dir", "rtl"=>"dir"}, "dir"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"}, "s"=>{"ltr"=>"dir", "rtl"=>"dir"}, "ol"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"}, "hr"=> {"center"=>"align", "left"=>"align", "ltr"=>"dir", "noshade"=>"noshade", "right"=>"align", "rtl"=>"dir"}, "applet"=> {"bottom"=>"align", "left"=>"align", "middle"=>"align", "right"=>"align", "top"=>"align"}, "table"=> {"above"=>"frame", "all"=>"rules", "below"=>"frame", "border"=>"frame", "box"=>"frame", "center"=>"align", "cols"=>"rules", "groups"=>"rules", "hsides"=>"frame", "left"=>"align", "lhs"=>"frame", "ltr"=>"dir", "none"=>"rules", "rhs"=>"frame", "right"=>"align", "rows"=>"rules", "rtl"=>"dir", "void"=>"frame", "vsides"=>"frame"}, "legend"=> {"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir", "top"=>"align"}, "cite"=>{"ltr"=>"dir", "rtl"=>"dir"}, "a"=> {"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "poly"=>"shape", "rect"=>"shape", "rtl"=>"dir"}, "html"=>{"ltr"=>"dir", "rtl"=>"dir"}, "u"=>{"ltr"=>"dir", "rtl"=>"dir"}, "blockquote"=>{"ltr"=>"dir", "rtl"=>"dir"}, "center"=>{"ltr"=>"dir", "rtl"=>"dir"}, "b"=>{"ltr"=>"dir", "rtl"=>"dir"}, "th"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap", "right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir", "top"=>"valign"}, "link"=>{"ltr"=>"dir", "rtl"=>"dir"}, "var"=>{"ltr"=>"dir", "rtl"=>"dir"}, "samp"=>{"ltr"=>"dir", "rtl"=>"dir"}, "div"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "textarea"=> {"disabled"=>"disabled", "ltr"=>"dir", "readonly"=>"readonly", "rtl"=>"dir"}, "pre"=>{"ltr"=>"dir", "rtl"=>"dir"}, "head"=>{"ltr"=>"dir", "rtl"=>"dir"}, "span"=>{"ltr"=>"dir", "rtl"=>"dir"}, "br"=>{"all"=>"clear", "left"=>"clear", "none"=>"clear", "right"=>"clear"}, "script"=>{"defer"=>"defer"}, "noframes"=>{"ltr"=>"dir", "rtl"=>"dir"}, "style"=>{"ltr"=>"dir", "rtl"=>"dir"}, "meta"=>{"ltr"=>"dir", "rtl"=>"dir"}, "dt"=>{"ltr"=>"dir", "rtl"=>"dir"}, "option"=> {"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir", "selected"=>"selected"}, "kbd"=>{"ltr"=>"dir", "rtl"=>"dir"}, "big"=>{"ltr"=>"dir", "rtl"=>"dir"}, "tfoot"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "sup"=>{"ltr"=>"dir", "rtl"=>"dir"}, "bdo"=>{"ltr"=>"dir", "rtl"=>"dir"}, "isindex"=>{"ltr"=>"dir", "rtl"=>"dir"}, "dfn"=>{"ltr"=>"dir", "rtl"=>"dir"}, "fieldset"=>{"ltr"=>"dir", "rtl"=>"dir"}, "em"=>{"ltr"=>"dir", "rtl"=>"dir"}, "font"=>{"ltr"=>"dir", "rtl"=>"dir"}, "tbody"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "noscript"=>{"ltr"=>"dir", "rtl"=>"dir"}, "li"=>{"ltr"=>"dir", "rtl"=>"dir"}, "col"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "small"=>{"ltr"=>"dir", "rtl"=>"dir"}, "dd"=>{"ltr"=>"dir", "rtl"=>"dir"}, "i"=>{"ltr"=>"dir", "rtl"=>"dir"}, "menu"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"}, "strong"=>{"ltr"=>"dir", "rtl"=>"dir"}, "img"=> {"bottom"=>"align", "ismap"=>"ismap", "left"=>"align", "ltr"=>"dir", "middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"}, "optgroup"=>{"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir"}, "map"=>{"ltr"=>"dir", "rtl"=>"dir"}, "address"=>{"ltr"=>"dir", "rtl"=>"dir"}, "h1"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "sub"=>{"ltr"=>"dir", "rtl"=>"dir"}, "param"=>{"data"=>"valuetype", "object"=>"valuetype", "ref"=>"valuetype"}, "input"=> {"bottom"=>"align", "button"=>"type", "checkbox"=>"type", "checked"=>"checked", "disabled"=>"disabled", "file"=>"type", "hidden"=>"type", "image"=>"type", "ismap"=>"ismap", "left"=>"align", "ltr"=>"dir", "middle"=>"align", "password"=>"type", "radio"=>"type", "readonly"=>"readonly", "reset"=>"type", "right"=>"align", "rtl"=>"dir", "submit"=>"type", "text"=>"type", "top"=>"align"}, "h2"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "abbr"=>{"ltr"=>"dir", "rtl"=>"dir"}, "h3"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "strike"=>{"ltr"=>"dir", "rtl"=>"dir"}, "body"=>{"ltr"=>"dir", "rtl"=>"dir"}, "ins"=>{"ltr"=>"dir", "rtl"=>"dir"}, "button"=> {"button"=>"type", "disabled"=>"disabled", "ltr"=>"dir", "reset"=>"type", "rtl"=>"dir", "submit"=>"type"}, "h4"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "select"=> {"disabled"=>"disabled", "ltr"=>"dir", "multiple"=>"multiple", "rtl"=>"dir"}, "caption"=> {"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir", "top"=>"align"}, "colgroup"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "tr"=> {"baseline"=>"valign", "bottom"=>"valign", "center"=>"align", "char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"}, "del"=>{"ltr"=>"dir", "rtl"=>"dir"}, "h5"=> {"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align", "rtl"=>"dir"}, "iframe"=> {"0"=>"frameborder", "1"=>"frameborder", "auto"=>"scrolling", "bottom"=>"align", "left"=>"align", "middle"=>"align", "no"=>"scrolling", "right"=>"align", "top"=>"align", "yes"=>"scrolling"}} # :startdoc: # The code above is auto-generated. Don't edit manually. end htree-0.8/htree/traverse.rb0000644000175000017500000003555611747021106015002 0ustar jonasjonasrequire 'htree/doc' require 'htree/elem' require 'htree/loc' require 'htree/extract_text' require 'uri' module HTree module Traverse def doc?() Doc::Trav === self end def elem?() Elem::Trav === self end def text?() Text::Trav === self end def xmldecl?() XMLDecl::Trav === self end def doctype?() DocType::Trav === self end def procins?() ProcIns::Trav === self end def comment?() Comment::Trav === self end def bogusetag?() BogusETag::Trav === self end def get_subnode(*indexes) n = self indexes.each {|index| n = n.get_subnode_internal(index) } n end end module Container::Trav # +each_child+ iterates over each child. def each_child(&block) # :yields: child_node children.each(&block) nil end # +each_child_with_index+ iterates over each child. def each_child_with_index(&block) # :yields: child_node, index children.each_with_index(&block) nil end # +find_element+ searches an element which universal name is specified by # the arguments. # It returns nil if not found. def find_element(*names) traverse_element(*names) {|e| return e } nil end # +traverse_element+ traverses elements in the tree. # It yields elements in depth first order. # # If _names_ are empty, it yields all elements. # If non-empty _names_ are given, it should be list of universal names. # # A nested element is yielded in depth first order as follows. # # t = HTree('') # t.traverse_element("a", "c") {|e| p e} # # => # {elem {elem {emptyelem } } {emptyelem } } # {emptyelem } # {emptyelem } # # Universal names are specified as follows. # # t = HTree(<<'End') # # # # # End # t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e} # # => # {emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">} # {emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">} # def traverse_element(*names, &block) # :yields: element if names.empty? traverse_all_element(&block) else name_set = {} names.each {|n| name_set[n] = true } traverse_some_element(name_set, &block) end nil end def each_hyperlink_attribute traverse_element( '{http://www.w3.org/1999/xhtml}a', '{http://www.w3.org/1999/xhtml}area', '{http://www.w3.org/1999/xhtml}link', '{http://www.w3.org/1999/xhtml}img', '{http://www.w3.org/1999/xhtml}object', '{http://www.w3.org/1999/xhtml}q', '{http://www.w3.org/1999/xhtml}blockquote', '{http://www.w3.org/1999/xhtml}ins', '{http://www.w3.org/1999/xhtml}del', '{http://www.w3.org/1999/xhtml}form', '{http://www.w3.org/1999/xhtml}input', '{http://www.w3.org/1999/xhtml}head', '{http://www.w3.org/1999/xhtml}base', '{http://www.w3.org/1999/xhtml}script') {|elem| case elem.name when %r{\{http://www.w3.org/1999/xhtml\}(?:base|a|area|link)\z}i attrs = ['href'] when %r{\{http://www.w3.org/1999/xhtml\}(?:img)\z}i attrs = ['src', 'longdesc', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:object)\z}i attrs = ['classid', 'codebase', 'data', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:q|blockquote|ins|del)\z}i attrs = ['cite'] when %r{\{http://www.w3.org/1999/xhtml\}(?:form)\z}i attrs = ['action'] when %r{\{http://www.w3.org/1999/xhtml\}(?:input)\z}i attrs = ['src', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:head)\z}i attrs = ['profile'] when %r{\{http://www.w3.org/1999/xhtml\}(?:script)\z}i attrs = ['src', 'for'] end attrs.each {|attr| if hyperlink = elem.get_attribute(attr) yield elem, attr, hyperlink end } } end private :each_hyperlink_attribute # +each_hyperlink_uri+ traverses hyperlinks such as HTML href attribute # of A element. # # It yields HTree::Text (or HTree::Loc) and URI for each hyperlink. # # The URI objects are created with a base URI which is given by # HTML BASE element or the argument ((|base_uri|)). # +each_hyperlink_uri+ doesn't yields href of the BASE element. def each_hyperlink_uri(base_uri=nil) # :yields: hyperlink, uri base_uri = URI.parse(base_uri) if String === base_uri links = [] each_hyperlink_attribute {|elem, attr, hyperlink| if %r{\{http://www.w3.org/1999/xhtml\}(?:base)\z}i =~ elem.name base_uri = URI.parse(hyperlink.to_s) else links << hyperlink end } if base_uri links.each {|hyperlink| yield hyperlink, base_uri + hyperlink.to_s } else links.each {|hyperlink| yield hyperlink, URI.parse(hyperlink.to_s) } end end # +each_hyperlink+ traverses hyperlinks such as HTML href attribute # of A element. # # It yields HTree::Text or HTree::Loc. # # Note that +each_hyperlink+ yields HTML href attribute of BASE element. def each_hyperlink # :yields: text each_hyperlink_attribute {|elem, attr, hyperlink| yield hyperlink } end # +each_uri+ traverses hyperlinks such as HTML href attribute # of A element. # # It yields URI for each hyperlink. # # The URI objects are created with a base URI which is given by # HTML BASE element or the argument ((|base_uri|)). def each_uri(base_uri=nil) # :yields: URI each_hyperlink_uri(base_uri) {|hyperlink, uri| yield uri } end end # :stopdoc: module Doc::Trav def traverse_all_element(&block) children.each {|c| c.traverse_all_element(&block) } end end module Elem::Trav def traverse_all_element(&block) yield self children.each {|c| c.traverse_all_element(&block) } end end module Leaf::Trav def traverse_all_element end end module Doc::Trav def traverse_some_element(name_set, &block) children.each {|c| c.traverse_some_element(name_set, &block) } end end module Elem::Trav def traverse_some_element(name_set, &block) yield self if name_set.include? self.name children.each {|c| c.traverse_some_element(name_set, &block) } end end module Leaf::Trav def traverse_some_element(name_set) end end # :startdoc: module Traverse # +traverse_text+ traverses texts in the tree def traverse_text(&block) # :yields: text traverse_text_internal(&block) nil end end # :stopdoc: module Container::Trav def traverse_text_internal(&block) each_child {|c| c.traverse_text_internal(&block) } end end module Leaf::Trav def traverse_text_internal end end module Text::Trav def traverse_text_internal yield self end end # :startdoc: module Container::Trav # +filter+ rebuilds the tree without some components. # # node.filter {|descendant_node| predicate } -> node # loc.filter {|descendant_loc| predicate } -> node # # +filter+ yields each node except top node. # If given block returns false, corresponding node is dropped. # If given block returns true, corresponding node is retained and # inner nodes are examined. # # +filter+ returns an node. # It doesn't return location object even if self is location object. # def filter(&block) subst = {} each_child_with_index {|descendant, i| if yield descendant if descendant.elem? subst[i] = descendant.filter(&block) else subst[i] = descendant end else subst[i] = nil end } to_node.subst_subnode(subst) end end module Doc::Trav # +title+ searches title and return it as a text. # It returns nil if not found. # # +title+ searchs following information. # # - ... in HTML # - ... in RSS # - ... in Atom def title e = find_element('title', '{http://www.w3.org/1999/xhtml}title', '{http://purl.org/rss/1.0/}title', '{http://my.netscape.com/rdf/simple/0.9/}title', '{http://www.w3.org/2005/Atom}title', '{http://purl.org/atom/ns#}title') e && e.extract_text end # +author+ searches author and return it as a text. # It returns nil if not found. # # +author+ searchs following information. # # - in HTML # - in HTML # - author-name in RSS # - author-name in RSS # - author-name in Atom def author traverse_element('meta', '{http://www.w3.org/1999/xhtml}meta') {|e| begin next unless e.fetch_attr('name').downcase == 'author' author = e.fetch_attribute('content').strip return author if !author.empty? rescue IndexError end } traverse_element('link', '{http://www.w3.org/1999/xhtml}link') {|e| begin next unless e.fetch_attr('rev').downcase == 'made' author = e.fetch_attribute('title').strip return author if !author.empty? rescue IndexError end } if channel = find_element('{http://purl.org/rss/1.0/}channel') channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e| begin author = e.extract_text.strip return author if !author.empty? rescue IndexError end } channel.traverse_element('{http://purl.org/dc/elements/1.1/}publisher') {|e| begin author = e.extract_text.strip return author if !author.empty? rescue IndexError end } end ['http://www.w3.org/2005/Atom', 'http://purl.org/atom/ns#'].each {|xmlns| each_child {|top| next unless top.elem? if top.name == "{#{xmlns}}feed" if feed_author = find_element("{#{xmlns}}author") feed_author.traverse_element("{#{xmlns}}name") {|e| begin author = e.extract_text.strip return author if !author.empty? rescue IndexError end } end end } } nil end end module Doc::Trav # +root+ searches root element. # If there is no element on top level, it raise HTree::Error. # If there is two or more elements on top level, it raise HTree::Error. def root es = [] children.each {|c| es << c if c.elem? } raise HTree::Error, "no element" if es.empty? raise HTree::Error, "multiple top elements" if 1 < es.length es[0] end # +has_xmldecl?+ returns true if there is an XML declaration on top level. def has_xmldecl? children.each {|c| return true if c.xmldecl? } false end end module Elem::Trav # +name+ returns the universal name of the element as a string. # # p HTree('').root.name # # => # "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF" # def name() element_name.universal_name end # +qualified_name+ returns the qualified name of the element as a string. # # p HTree('').root.qualified_name # # => # "rdf:RDF" def qualified_name() element_name.qualified_name end # +attributes+ returns attributes as a hash. # The hash keys are HTree::Name objects. # The hash values are HTree::Text or HTree::Location objects. # # p HTree('').root.attributes # # => # {href=>{text "uu"}, name=>{text "xx"}} # # p HTree('').make_loc.root.attributes # # => # {href=>#, name=>#} # def attributes result = {} each_attribute {|name, text| result[name] = text } result end def each_attr each_attribute {|name, text| uname = name.universal_name str = text.to_s yield uname, str } end # call-seq: # elem.fetch_attribute(name) -> text or raise IndexError # elem.fetch_attribute(name, default) -> text or default # elem.fetch_attribute(name) {|uname| default } -> text or default # # +fetch_attribute+ returns an attribute value as a text. # # elem may be an instance of HTree::Elem or a location points to it. def fetch_attribute(uname, *rest, &block) if 1 < rest.length raise ArgumentError, "wrong number of arguments (#{1+rest.length} for 2)" end if !rest.empty? && block_given? raise ArgumentError, "block supersedes default value argument" end uname = uname.universal_name if uname.respond_to? :universal_name return update_attribute_hash.fetch(uname) { if block_given? return yield(uname) elsif !rest.empty? return rest[0] else raise IndexError, "attribute not found: #{uname.inspect}" end } end # call-seq: # elem.fetch_attr(name) -> string or raise IndexError # elem.fetch_attr(name, default) -> string or default # elem.fetch_attr(name) {|uname| default } -> string or default # # +fetch_attr+ returns an attribute value as a string. # # elem may be an instance of HTree::Elem or a location points to it. def fetch_attr(uname, *rest, &block) if 1 < rest.length raise ArgumentError, "wrong number of arguments (#{1+rest.length} for 2)" end if !rest.empty? && block_given? raise ArgumentError, "block supersedes default value argument" end uname = uname.universal_name if uname.respond_to? :universal_name return update_attribute_hash.fetch(uname) { if block_given? return yield(uname) elsif !rest.empty? return rest[0] else raise IndexError, "attribute not found: #{uname.inspect}" end }.to_s end def get_attribute(uname) uname = uname.universal_name if uname.respond_to? :universal_name update_attribute_hash[uname] end def get_attr(uname) if text = update_attribute_hash[uname] text.to_s else nil end end end end htree-0.8/htree/leaf.rb0000644000175000017500000000525711747021106014051 0ustar jonasjonasrequire 'htree/modules' require 'htree/raw_string' module HTree class XMLDecl def initialize(version, encoding=nil, standalone=nil) init_raw_string if /\A[a-zA-Z0-9_.:-]+\z/ !~ version raise HTree::Error, "invalid version in XML declaration: #{version.inspect}" end if encoding && /\A[A-Za-z][A-Za-z0-9._-]*\z/ !~ encoding raise HTree::Error, "invalid encoding in XML declaration: #{encoding.inspect}" end unless standalone == nil || standalone == true || standalone == false raise HTree::Error, "invalid standalone document declaration in XML declaration: #{standalone.inspect}" end @version = version @encoding = encoding @standalone = standalone end attr_reader :version, :encoding, :standalone end class DocType def initialize(root_element_name, public_identifier=nil, system_identifier=nil) init_raw_string if public_identifier && /\A[ \x0d\x0aa-zA-Z0-9\-'()+,.\/:=?;!*\#@$_%]*\z/ !~ public_identifier raise HTree::Error, "invalid public identifier in document type declaration: #{public_identifier.inspect}" end if system_identifier && /"/ =~ system_identifier && /'/ =~ system_identifier raise HTree::Error, "invalid system identifier in document type declaration: #{system_identifier.inspect}" end @root_element_name = root_element_name @public_identifier = public_identifier @system_identifier = system_identifier end attr_reader :root_element_name, :public_identifier, :system_identifier end class ProcIns # :stopdoc: class << self alias new! new end # :startdoc: def ProcIns.new(target, content) content = content.gsub(/\?>/, '? >') if content new! target, content end def initialize(target, content) # :notnew: init_raw_string if content && /\?>/ =~ content raise HTree::Error, "invalid processing instruction content: #{content.inspect}" end @target = target @content = content end attr_reader :target, :content end class Comment # :stopdoc: class << self alias new! new end # :startdoc: def Comment.new(content) content = content.gsub(/-(-+)/) { '-' + ' -' * $1.length }.sub(/-\z/, '- ') new! content end def initialize(content) # :notnew: init_raw_string if /--/ =~ content || /-\z/ =~ content raise HTree::Error, "invalid comment content: #{content.inspect}" end @content = content end attr_reader :content end class BogusETag def initialize(qualified_name) init_raw_string @etag = ETag.new(qualified_name) end end end htree-0.8/htree/raw_string.rb0000644000175000017500000000477211747021106015322 0ustar jonasjonasrequire 'htree/modules' require 'htree/fstr' module HTree module Node # raw_string returns a source string recorded by parsing. # It returns +nil+ if the node is constructed not via parsing. def raw_string catch(:raw_string_tag) { return raw_string_internal('') } nil end end # :stopdoc: class Doc def raw_string_internal(result) @children.each {|n| n.raw_string_internal(result) } end end class Elem def raw_string_internal(result) @stag.raw_string_internal(result) @children.each {|n| n.raw_string_internal(result) } @etag.raw_string_internal(result) if @etag end end module Tag def init_raw_string() @raw_string = nil end def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end def raw_string_internal(result) throw :raw_string_tag if !@raw_string result << @raw_string end end module Leaf def init_raw_string() @raw_string = nil end def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end def raw_string_internal(result) throw :raw_string_tag if !@raw_string result << @raw_string end end class Text def raw_string=(arg) if arg == @rcdata then @raw_string = @rcdata else super end end end # :startdoc: module Node def eliminate_raw_string raise NotImplementedError end end # :stopdoc: class Doc def eliminate_raw_string Doc.new(@children.map {|c| c.eliminate_raw_string }) end end class Elem def eliminate_raw_string Elem.new!( @stag.eliminate_raw_string, @empty ? nil : @children.map {|c| c.eliminate_raw_string }, @etag && @etag.eliminate_raw_string) end end class Text def eliminate_raw_string Text.new_internal(@rcdata) end end class STag def eliminate_raw_string STag.new(@qualified_name, @attributes, @inherited_context) end end class ETag def eliminate_raw_string self.class.new(@qualified_name) end end class XMLDecl def eliminate_raw_string XMLDecl.new(@version, @encoding, @standalone) end end class DocType def eliminate_raw_string DocType.new(@root_element_name, @public_identifier, @system_identifier) end end class ProcIns def eliminate_raw_string ProcIns.new(@target, @content) end end class Comment def eliminate_raw_string Comment.new(@content) end end # :startdoc: end htree-0.8/htree/scan.rb0000644000175000017500000001522511747021106014062 0ustar jonasjonasrequire 'htree/htmlinfo' require 'htree/regexp-util' require 'htree/fstr' module HTree # :stopdoc: module Pat NameChar = /[-A-Za-z0-9._:]/ Name = /[A-Za-z_:]#{NameChar}*/ Nmtoken = /#{NameChar}+/ Comment_C = //m Comment = Comment_C.disable_capture CDATA_C = //m CDATA = CDATA_C.disable_capture QuotedAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)')/ QuotedAttr = QuotedAttr_C.disable_capture ValidAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)'|(#{NameChar}*))|(#{Nmtoken})/ ValidAttr = ValidAttr_C.disable_capture InvalidAttr1_C = /(#{Name})\s*=\s*(?:'([^'<>]*)'|"([^"<>]*)"|([^\s<>"']*(?![^\s<>"'])))|(#{Nmtoken})/ InvalidAttr1 = InvalidAttr1_C.disable_capture InvalidAttr1End_C = /(#{Name})(?:\s*=\s*(?:'([^'<>]*)|"([^"<>]*)))/ InvalidAttr1End = InvalidAttr1End_C.disable_capture QuotedStartTag_C = /<(#{Name})((?:\s+#{QuotedAttr})*)\s*>/ QuotedStartTag = QuotedStartTag_C.disable_capture ValidStartTag_C = /<(#{Name})((?:\s+#{ValidAttr})*)\s*>/ ValidStartTag = ValidStartTag_C.disable_capture InvalidStartTag_C = /<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*>/ InvalidStartTag = InvalidStartTag_C.disable_capture StartTag = /#{QuotedStartTag}|#{ValidStartTag}|#{InvalidStartTag}/ QuotedEmptyTag_C = %r{<(#{Name})((?:\s+#{QuotedAttr})*)\s*/>} QuotedEmptyTag = QuotedEmptyTag_C.disable_capture ValidEmptyTag_C = %r{<(#{Name})((?:\s+#{ValidAttr})*)\s*/>} ValidEmptyTag = ValidEmptyTag_C.disable_capture InvalidEmptyTag_C = %r{<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*/>} InvalidEmptyTag = InvalidEmptyTag_C.disable_capture EmptyTag = /#{QuotedEmptyTag}|#{ValidEmptyTag}|#{InvalidEmptyTag}/ EndTag_C = %r{} EndTag = EndTag_C.disable_capture XmlVersionNum = /[a-zA-Z0-9_.:-]+/ XmlVersionInfo_C = /\s+version\s*=\s*(?:'(#{XmlVersionNum})'|"(#{XmlVersionNum})")/ XmlVersionInfo = XmlVersionInfo_C.disable_capture XmlEncName = /[A-Za-z][A-Za-z0-9._-]*/ XmlEncodingDecl_C = /\s+encoding\s*=\s*(?:"(#{XmlEncName})"|'(#{XmlEncName})')/ XmlEncodingDecl = XmlEncodingDecl_C.disable_capture XmlSDDecl_C = /\s+standalone\s*=\s*(?:'(yes|no)'|"(yes|no)")/ XmlSDDecl = XmlSDDecl_C.disable_capture XmlDecl_C = /<\?xml#{XmlVersionInfo_C}#{XmlEncodingDecl_C}?#{XmlSDDecl_C}?\s*\?>/ XmlDecl = /<\?xml#{XmlVersionInfo}#{XmlEncodingDecl}?#{XmlSDDecl}?\s*\?>/ # xxx: internal DTD subset is not recognized: '[' (markupdecl | DeclSep)* ']' S?)? SystemLiteral_C = /"([^"]*)"|'([^']*)'/ PubidLiteral_C = %r{"([\sa-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*)"|'([\sa-zA-Z0-9\-()+,./:=?;!*\#@$_%]*)'} ExternalID_C = /(?:SYSTEM|PUBLIC\s+#{PubidLiteral_C})(?:\s+#{SystemLiteral_C})?/ DocType_C = //m DocType = DocType_C.disable_capture XmlProcIns_C = /<\?(#{Name})(?:\s+(.*?))?\?>/m XmlProcIns = XmlProcIns_C.disable_capture #ProcIns = /<\?([^>]*)>/m end def HTree.scan(input, is_xml=false) is_html = false cdata_content = nil cdata_content_string = nil pcdata = '' first_element = true index_otherstring = 1 index_str = 2 index_xmldecl = 3 index_doctype = 4 index_xmlprocins = 5 index_quotedstarttag = 6 index_quotedemptytag = 7 index_starttag = 8 index_endtag = 9 index_emptytag = 10 index_comment = 11 index_cdata = 12 index_end = 13 pat = /\G(.*?)((#{Pat::XmlDecl}) |(#{Pat::DocType}) |(#{Pat::XmlProcIns}) |(#{Pat::QuotedStartTag}) |(#{Pat::QuotedEmptyTag}) |(#{Pat::StartTag}) |(#{Pat::EndTag}) |(#{Pat::EmptyTag}) |(#{Pat::Comment}) |(#{Pat::CDATA}) |(\z)) /oxm input.scan(pat) { match = $~ if cdata_content cdata_content_string << match[index_otherstring] str = match[index_str] if match[index_endtag] && str[Pat::Name] == cdata_content unless cdata_content_string.empty? yield [:text_cdata_content, HTree.frozen_string(cdata_content_string)] end yield [:etag, HTree.frozen_string(str)] cdata_content = nil cdata_content_string = nil elsif match[index_end] cdata_content_string << str unless cdata_content_string.empty? yield [:text_cdata_content, HTree.frozen_string(cdata_content_string)] end cdata_content = nil cdata_content_string = nil else cdata_content_string << str end else pcdata << match[index_otherstring] str = match[index_str] if !pcdata.empty? yield [:text_pcdata, HTree.frozen_string(pcdata)] pcdata = '' end if match[index_xmldecl] yield [:xmldecl, HTree.frozen_string(str)] is_xml = true elsif match[index_doctype] Pat::DocType_C =~ str root_element_name = $1 public_identifier = $2 || $3 #system_identifier = $4 || $5 is_html = true if /\Ahtml\z/i =~ root_element_name is_xml = true if public_identifier && %r{\A-//W3C//DTD XHTML } =~ public_identifier yield [:doctype, HTree.frozen_string(str)] elsif match[index_xmlprocins] yield [:procins, HTree.frozen_string(str)] elsif match[index_starttag] || match[index_quotedstarttag] yield stag = [:stag, HTree.frozen_string(str)] tagname = str[Pat::Name] if first_element if /\A(?:html|head|title|isindex|base|script|style|meta|link|object)\z/i =~ tagname is_html = true else is_xml = true end first_element = false end if !is_xml && ElementContent[tagname] == :CDATA cdata_content = tagname cdata_content_string = '' end elsif match[index_endtag] yield [:etag, HTree.frozen_string(str)] elsif match[index_emptytag] || match[index_quotedemptytag] yield [:emptytag, HTree.frozen_string(str)] first_element = false #is_xml = true elsif match[index_comment] yield [:comment, HTree.frozen_string(str)] elsif match[index_cdata] yield [:text_cdata_section, HTree.frozen_string(str)] elsif match[index_end] # pass else raise Exception, "unknown match [bug]" end end } return is_xml, is_html end # :startdoc: end htree-0.8/htree/output.rb0000644000175000017500000001201711747021106014472 0ustar jonasjonasrequire 'htree/encoder' require 'htree/doc' require 'htree/elem' require 'htree/leaf' require 'htree/text' module HTree # :stopdoc: class Text ChRef = { '>' => '>', '<' => '<', '"' => '"', } def output(out, context=nil) out.output_text @rcdata.gsub(/[<>]/) {|s| ChRef[s] } end def to_attvalue_content @rcdata.gsub(/[<>"]/) {|s| ChRef[s] } end def output_attvalue(out, context) out.output_string '"' out.output_text to_attvalue_content out.output_string '"' end def output_cdata(out) str = self.to_s if %r{" children_context end def output_stag(out, context) out.output_string '<' @name.output(out, context) children_context = output_attributes(out, context) out.output_string "\n>" children_context end def output_etag(out, context) out.output_string '" end end class Context def output_namespaces(out, outer_context) unknown_namespaces = {} @namespaces.each {|prefix, uri| outer_uri = outer_context.namespace_uri(prefix) if outer_uri == nil unknown_namespaces[prefix] = uri elsif outer_uri != uri if prefix out.output_string " xmlns:#{prefix}=" else out.output_string " xmlns=" end Text.new(uri).output_attvalue(out, outer_context) end } unless unknown_namespaces.empty? out.output_xmlns(unknown_namespaces) end outer_context.subst_namespaces(@namespaces) end end class BogusETag # don't output anything. def output(out, context) end end class XMLDecl # don't output anything. def output(out, context) end def output_prolog_xmldecl(out, context) out.output_string "" end end class DocType def output(out, context) out.output_string "" end def generate_content # :nodoc: result = '' if @public_identifier result << "PUBLIC \"#{@public_identifier}\"" else result << "SYSTEM" end # Although a system identifier is not omissible in XML, # we cannot output it if it is not given. if @system_identifier if /"/ !~ @system_identifier result << " \"#{@system_identifier}\"" else result << " '#{@system_identifier}'" end end result end end class ProcIns def output(out, context) out.output_string "" end end class Comment def output(out, context) out.output_string "" end end # :startdoc: end htree-0.8/htree/display.rb0000644000175000017500000000300711747021106014576 0ustar jonasjonasrequire 'htree/output' module HTree module Node # HTree::Node#display_xml prints the node as XML. # # The first optional argument, out, # specifies output target. # It should respond to <<. # If it is not specified, $stdout is used. # # The second optional argument, encoding, # specifies output MIME charset (character encoding). # If it is not specified, HTree::Encoder.internal_charset is used. # # HTree::Node#display_xml returns out. def display_xml(out=$stdout, encoding=HTree::Encoder.internal_charset) encoder = HTree::Encoder.new(encoding) self.output(encoder, HTree::DefaultContext) # don't call finish_with_xmldecl because self already has a xml decl. out << encoder.finish out end # HTree::Node#display_html prints the node as HTML. # # The first optional argument, out, # specifies output target. # It should respond to <<. # If it is not specified, $stdout is used. # # The second optional argument, encoding, # specifies output MIME charset (character encoding). # If it is not specified, HTree::Encoder.internal_charset is used. # # HTree::Node#display_html returns out. def display_html(out=$stdout, encoding=HTree::Encoder.internal_charset) encoder = HTree::Encoder.new(encoding) encoder.html_output = true self.output(encoder, HTree::HTMLContext) out << encoder.finish out end end end htree-0.8/htree/loc.rb0000644000175000017500000002301011747021106013702 0ustar jonasjonasrequire 'htree/modules' require 'htree/elem' require 'htree/inspect' module HTree module Node # creates a location object which points to self. def make_loc self.class::Loc.new(nil, nil, self) end # return self. def to_node self end # +subst+ substitutes several subtrees at once. # # t = HTree('') # l = t.make_loc # t2 = t.subst({ # l.get_subnode(0, 'k') => 'v', # l.get_subnode(0, -1) => HTree(''), # l.get_subnode(0, 1) => nil, # l.get_subnode(0, 2, 0) => HTree(''), # }) # pp t2 # # => # # {emptyelem } {emptyelem } {elem {emptyelem }}}> def subst(pairs) pairs = pairs.map {|key, val| key = key.index_list(self) unless Array === val val = [val] end [key, val] } pairs_empty_key, pairs_nonempty_key = pairs.partition {|key, val| key.empty? } if !pairs_empty_key.empty? if !pairs_nonempty_key.empty? raise ArgumentError, "cannot substitute a node under substituting tree." end result = [] pairs_empty_key.each {|key, val| result.concat val } result.compact! if result.length == 1 return result[0] else raise ArgumentError, "cannot substitute top node by multiple nodes: #{nodes.inspect}" end end if pairs_nonempty_key.empty? return self end subst_internal(pairs) end def subst_internal(pairs) # :nodoc: subnode_pairs = {} pairs.each {|key, val| k = key.pop (subnode_pairs[k] ||= []) << [key, val] } subnode_pairs = subnode_pairs.map {|k, subpairs| s = get_subnode(k) subpairs_empty_key, subpairs_nonempty_key = subpairs.partition {|key, val| key.empty? } if !subpairs_empty_key.empty? if !subpairs_nonempty_key.empty? raise ArgumentError, "cannot substitute a node under substituting tree." end r = [] subpairs_empty_key.each {|key, val| r.concat val } [k, r.compact] elsif subpairs_nonempty_key.empty? [k, s] else [k, s.subst_internal(subpairs)] end } subst_subnode(subnode_pairs) end end # :stopdoc: class Doc; def node_test_string() 'doc()' end end class Elem; def node_test_string() @stag.element_name.qualified_name end end class Text; def node_test_string() 'text()' end end class BogusETag; def node_test_string() 'bogus-etag()' end end class XMLDecl; def node_test_string() 'xml-declaration()' end end class DocType; def node_test_string() 'doctype()' end end class ProcIns; def node_test_string() 'processing-instruction()' end end class Comment; def node_test_string() 'comment()' end end module Container def find_loc_step(index) if index < 0 || @children.length <= index return "*[#{index}]" end return @loc_step_children[index].dup if defined? @loc_step_children count = {} count.default = 0 steps = [] @children.each {|c| node_test = c.node_test_string count[node_test] += 1 steps << [node_test, count[node_test]] } @loc_step_children = [] steps.each {|node_test, i| if count[node_test] == 1 @loc_step_children << node_test else @loc_step_children << "#{node_test}[#{i}]" end } return @loc_step_children[index].dup end end class Elem def find_loc_step(index) return super if Integer === index if String === index index = Name.parse_attribute_name(index, DefaultContext) end unless Name === index raise TypeError, "invalid index: #{index.inspect}" end "@#{index.qualified_name}" end end # :startdoc: end class HTree::Location def initialize(parent, index, node) # :nodoc: if parent @parent = parent @index = index @node = parent.node.get_subnode(index) if !@node.equal?(node) raise ArgumentError, "unexpected node" end else @parent = nil @index = nil @node = node end if @node && self.class != @node.class::Loc raise ArgumentError, "invalid location class: #{self.class} should be #{node.class::Loc}" end @subloc = {} end attr_reader :parent, :index, :node alias to_node node # return self. def make_loc self end # +top+ returns the originator location. # # t = HTree('') # l = t.make_loc.get_subnode(0, 0, 0, 0) # p l, l.top # # => # # # # def top result = self while result.parent result = result.parent end result end # +subst_itself+ substitutes the node pointed by the location. # It returns the location of substituted node. # # t1 = HTree('') # p t1 # l1 = t1.make_loc.get_subnode(0, 0, 0, 0) # p l1 # l2 = l1.subst_itself(HTree('')) # p l2 # t2 = l2.top.to_node # p t2 # # => # # {elem {elem {emptyelem }}}}> # # # # # # {elem {elem {emptyelem }}}}> # def subst_itself(node) if @parent new_index = @index if !@node if Integer === @index if @index < 0 new_index = 0 elsif @parent.to_node.children.length < @index new_index = @parent.to_node.children.length end end end @parent.subst_itself(@parent.to_node.subst_subnode({@index=>node})).get_subnode(new_index) else node.make_loc end end # +subst+ substitutes several subtrees at once. # # t = HTree('') # l = t.make_loc # l2 = l.subst({ # l.root.get_subnode('k') => 'v', # l.root.get_subnode(-1) => HTree(''), # l.find_element('y') => nil, # l.find_element('z').get_subnode(0) => HTree(''), # }) # pp l2, l2.to_node # # => # # # # {emptyelem } {emptyelem } {elem {emptyelem }}}> def subst(pairs) subst_itself(@node.subst(pairs)) end # +loc_list+ returns an array containing from location's root to itself. # # t = HTree('') # l = t.make_loc.get_subnode(0, 0, 0) # pp l, l.loc_list # # => # # # [#, # #, # #, # #] # def loc_list loc = self result = [self] while loc = loc.parent result << loc end result.reverse! result end # +path+ returns the path of the location. # # l = HTree.parse("x").make_loc # l = l.get_subnode(0, 0, 0) # p l.path # => "doc()/a/b[1]/text()" def path result = '' loc_list.each {|loc| if parent = loc.parent result << '/' << parent.node.find_loc_step(loc.index) else result << loc.node.node_test_string end } result end def index_list(node) # :nodoc: result = [] loc = self while parent = loc.parent return result if loc.to_node.equal? node result << loc.index loc = parent end return result if loc.to_node.equal? node raise ArgumentError, "the location is not under the node: #{self.path}" end # :stopdoc: def pretty_print(q) q.group(1, "#<#{self.class.name}", '>') { q.text ':' q.breakable loc_list.each {|loc| if parent = loc.parent q.text '/' q.group { q.breakable '' } q.text parent.node.find_loc_step(loc.index) else q.text loc.node.node_test_string end } } end alias inspect pretty_print_inspect # :startdoc: end module HTree::Container::Loc # +get_subnode+ returns a location object which points to a subnode # indexed by _index_. def get_subnode_internal(index) # :nodoc: return @subloc[index] if @subloc.include? index node = @node.get_subnode(index) if node @subloc[index] = node.class::Loc.new(self, index, node) else @subloc[index] = HTree::Location.new(self, index, node) end end # +subst_subnode+ returns the location which refers the substituted tree. # loc.subst_subnode(pairs) -> loc # # t = HTree('') # l = t.make_loc.get_subnode(0, 0) # l = l.subst_subnode({0=>HTree('')}) # pp t, l.top.to_node # # => # # {elem {emptyelem }}}> # # {elem {emptyelem }}}> # def subst_subnode(pairs) self.subst_itself(@node.subst_subnode(pairs)) end # +children+ returns an array of child locations. def children (0...@node.children.length).map {|i| get_subnode(i) } end end class HTree::Elem::Loc def context() @node.context end # +element_name+ returns the name of the element name as a Name object. def element_name() @node.element_name end def empty_element?() @node.empty_element? end # +each_attribute+ iterates over each attributes. def each_attribute @node.each_attribute {|attr_name, attr_text| attr_loc = get_subnode(attr_name) yield attr_name, attr_loc } end end class HTree::Text::Loc def to_s() @node.to_s end def strip() @node.strip end def empty?() @node.empty? end end htree-0.8/htree/gencode.rb0000644000175000017500000001110511747021106014533 0ustar jonasjonasrequire 'htree/encoder' require 'htree/output' # :stopdoc: module HTree module Node def generate_xml_output_code(outvar='out', contextvar='top_context') namespaces = HTree::Context::DefaultNamespaces.dup namespaces.default = nil context = Context.new(namespaces) gen = HTree::GenCode.new(outvar, contextvar) output(gen, context) gen.finish end end class GenCode def initialize(outvar, contextvar, internal_encoding=Encoder.internal_charset) @outvar = outvar @contextvar = contextvar @state = :none @buffer = '' @internal_encoding = internal_encoding @code = '' @html_output = nil end attr_reader :outvar, :contextvar def html_output? @html_output end def html_output=(flag) @html_output = flag end class CDATABuffer def initialize @buf = '' end def html_output? true end def not_valid_for_html_cdata(*args) raise ArgumentError, "CDATA content only accept texts." end alias output_slash_if_xml not_valid_for_html_cdata alias output_cdata_content not_valid_for_html_cdata alias output_dynamic_attvalue not_valid_for_html_cdata def output_string(string) @buf << string end def output_text(string) @buf << string end ChRef = { '&' => '&', '<' => '<', '>' => '>', '"' => '"', } def output_dynamic_text(string) if string.respond_to? :rcdata @buf << string.rcdata.gsub(/[<>]/) { ChRef[$&] } else @buf << string.to_s.gsub(/[&<>]/) { ChRef[$&] } end end def result if %r{[<>]} =~ @buf raise ArgumentError, "cdata contains non-text : #{@buf.inspect}" end str = HTree::Text.parse_pcdata(@buf).to_s if %r{ '&', '>' => '>', '<' => '<', '"' => '"', } def output_xmlns(namespaces) unless namespaces.empty? flush_buffer namespaces.each {|k, v| if k ks = k.dump aname = "xmlns:#{k}" else ks = "nil" aname = "xmlns" end @code << "if #{@contextvar}.namespace_uri(#{ks}) != #{v.dump}\n" output_string " #{aname}=\"" output_text v.gsub(/[&<>"]/) {|s| ChRef[s] } output_string '"' flush_buffer @code << "end\n" } end end def flush_buffer return if @buffer.empty? case @state when :string @code << "#{@outvar}.output_string #{@buffer.dump}\n" @buffer = '' when :text @code << "#{@outvar}.output_text #{@buffer.dump}\n" @buffer = '' end end def finish flush_buffer @code end end end # :startdoc: htree-0.8/htree/doc.rb0000644000175000017500000001046111747021106013700 0ustar jonasjonasrequire 'htree/modules' require 'htree/container' module HTree class Doc # :stopdoc: class << self alias new! new end # :startdoc: # The arguments should be a sequence of follows. # [String object] specified string is converted to HTree::Text. # [HTree::Node object] used as a child. # [HTree::Doc object] # used as children. # It is expanded except HTree::XMLDecl and HTree::DocType objects. # [Array of String, HTree::Node and HTree::Doc] used as children. # def Doc.new(*args) children = [] args.each {|arg| arg = arg.to_node if HTree::Location === arg case arg when Array arg.each {|a| a = a.to_node if HTree::Location === a case a when HTree::Doc children.concat(a.children.reject {|c| HTree::XMLDecl === c || HTree::DocType === c }) when HTree::Node children << a when String children << Text.new(a) else raise TypeError, "unexpected argument: #{arg.inspect}" end } when HTree::Doc children.concat(arg.children.reject {|c| HTree::XMLDecl === c || HTree::DocType === c }) when HTree::Node children << arg when String children << Text.new(arg) else raise TypeError, "unexpected argument: #{arg.inspect}" end } new!(children) end def initialize(children=[]) # :notnew: @children = children.dup.freeze unless @children.all? {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) } unacceptable = @children.reject {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) } unacceptable = unacceptable.map {|uc| uc.inspect }.join(', ') raise TypeError, "Unacceptable document child: #{unacceptable}" end end def get_subnode_internal(index) # :nodoc: unless Integer === index raise TypeError, "invalid index: #{index.inspect}" end if index < 0 || @children.length <= index nil else @children[index] end end # doc.subst_subnode(pairs) -> doc # # The argument _pairs_ should be a hash or an assocs. # Its key should be an integer which means an index for children. # # Its value should be one of follows. # [HTree::Node object] specified object is used as is. # [String object] specified string is converted to HTree::Text # [Array of above] specified HTree::Node and String is used in that order. # [nil] delete corresponding node. # # d = HTree('') # p d.subst_subnode({0=>HTree(''), 2=>HTree('')}) # p d.subst_subnode([[0,HTree('')], [2,HTree('')]]) # # => # #} {emptyelem } {emptyelem }> # #} {emptyelem } {emptyelem }> # def subst_subnode(pairs) hash = {} pairs.each {|index, value| unless Integer === index raise TypeError, "invalid index: #{index.inspect}" end value = value.to_node if HTree::Location === value case value when Node value = [value] when String value = [value] when Array value = value.dup when nil value = [] else raise TypeError, "invalid value: #{value.inspect}" end value.map! {|v| v = v.to_node if HTree::Location === v case v when Node v when String Text.new(v) else raise TypeError, "invalid value: #{v.inspect}" end } if !hash.include?(index) hash[index] = [] end hash[index].concat value } children_left = [] children = @children.dup children_right = [] hash.keys.sort.each {|index| value = hash[index] if index < 0 children_left << value elsif children.length <= index children_right << value else children[index] = value end } children = [children_left, children, children_right].flatten.compact Doc.new(children) end end end htree-0.8/htree/encoder.rb0000644000175000017500000002312611747021106014554 0ustar jonasjonasif !"".respond_to?(:encode) require 'iconv' end module HTree class DummyEncodingConverter def initialize(encoding) @encoding = encoding end def primitive_convert(src, dst, destination_buffer=nil, destination_byteoffset=nil, destination_bytesize=nil, opts=nil) dst << src src.clear :source_buffer_empty end def convert(str) str end def finish "" end end class Encoder # HTree::Encoder.internal_charset returns the MIME charset corresponding to $KCODE. # # - 'ISO-8859-1' when $KCODE=='NONE' # - 'UTF-8' when $KCODE=='UTF8' # - 'EUC-JP' when $KCODE=='EUC' # - 'Shift_JIS' when $KCODE=='SJIS' # # This mapping ignores EUC-KR and various single byte charset other than ISO-8859-1 at least. # This should be fixed when Ruby is m17nized. def Encoder.internal_charset if Object.const_defined? :Encoding Encoding.default_external.name else KcodeCharset[$KCODE] end end def initialize(output_encoding, internal_encoding=HTree::Encoder.internal_charset) @buf = '' @internal_encoding = internal_encoding @output_encoding = output_encoding if defined? Encoding::Converter if @internal_encoding == output_encoding @ic = DummyEncodingConverter.new(@internal_encoding) else @ic = Encoding::Converter.new(@internal_encoding, output_encoding) end else @ic = Iconv.new(output_encoding, @internal_encoding) end @charpat = FirstCharPattern[internal_encoding] @subcharset_list = SubCharset[output_encoding] || [] @subcharset_ic = {} @subcharset_list.each {|subcharset| if defined? Encoding::Converter if @internal_encoding == subcharset @subcharset_ic[subcharset] = DummyEncodingConverter.new(@internal_encoding) else @subcharset_ic[subcharset] = Encoding::Converter.new(@internal_encoding, subcharset) end else @subcharset_ic[subcharset] = Iconv.new(subcharset, @internal_encoding) end } @html_output = false end # :stopdoc: def html_output? @html_output end def html_output=(flag) @html_output = flag end def output_cdata_content_do(out, pre, body, post) if @html_output pre.call body.call post.call(out) else body.call end return out end def output_slash_if_xml if !@html_output output_string('/') end end def output_cdata_content(content, context) if @html_output # xxx: should raise an error for non-text node? texts = content.grep(HTree::Text) text = HTree::Text.concat(*texts) text.output_cdata(self) else content.each {|n| n.output(self, context) } end end def output_cdata_for_html(*args) str = args.join('') if %r{ true) else res = @ic.primitive_convert(nil, dst="") end case res when :invalid_byte_sequence success = dst failed = src _, _, _, error_bytes, _ = @ic.primitive_errinfo preconv_bytesize = string.bytesize - failed.bytesize - error_bytes.bytesize output_string string[0, preconv_bytesize], success string = @ic.putback + failed output_string '?' next when :undefined_conversion success = dst failed = src _, enc1, _, error_bytes, _ = @ic.primitive_errinfo preconv_bytesize = string.bytesize - failed.bytesize - error_bytes.bytesize output_string string[0, preconv_bytesize], success string = @ic.putback + failed output_string error_bytes.encode('US-ASCII', enc1, :xml=>:text) next when :source_buffer_empty, :finished output_string string, dst return else raise "unexpected encoding converter result: #{res}" end else begin output_string string, @ic.iconv(string) return rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e success = e.success failed = e.failed end output_string string[0, string.length - failed.length], success end if FirstCharPattern[@internal_encoding] !~ failed # xxx: should be configulable? #raise ArgumentError, "cannot extract first character: #{e.failed.dump}" string = failed[1, failed.length-1] output_string '?' else char = $& rest = $' begin if char.respond_to? :encode excs = [Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError] ucode = char.encode("UTF-8", @internal_encoding).unpack("U")[0] else excs = [Iconv::IllegalSequence, Iconv::InvalidCharacter] ucode = Iconv.conv("UTF-8", @internal_encoding, char).unpack("U")[0] end char = "&##{ucode};" rescue *excs # xxx: should be configulable? char = '?' end output_string char string = rest end end end ChRef = { '&' => '&', '<' => '<', '>' => '>', '"' => '"', } def output_dynamic_text(string) if string.respond_to? :rcdata output_text(string.rcdata.gsub(/[<>]/) { ChRef[$&] }) else output_text(string.to_s.gsub(/[&<>]/) { ChRef[$&] }) end end def output_dynamic_attvalue(string) if string.respond_to? :rcdata output_text(string.rcdata.gsub(/[<>"]/) { ChRef[$&] }) else output_text(string.to_s.gsub(/[&<>"]/) { ChRef[$&] }) end end # :startdoc: def finish if @ic.respond_to? :finish external_str = @ic.finish else external_str = @ic.close end @buf << external_str @subcharset_ic.reject! {|subcharset, ic| if ic.respond_to? :finish begin ic.finish != external_str rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError true end else begin ic.close != external_str rescue Iconv::Failure true end end } @buf end def finish_with_xmldecl content = finish str = "" if str.respond_to? :encode xmldecl = str.encode(@output_encoding, 'US-ASCII') else xmldecl = Iconv.conv(@output_encoding, 'US-ASCII', str) end xmldecl + content end def minimal_charset @subcharset_list.each {|subcharset| if @subcharset_ic.include? subcharset return subcharset end } @output_encoding end # :stopdoc: KcodeCharset = { 'EUC' => 'EUC-JP', 'SJIS' => 'Shift_JIS', 'UTF8' => 'UTF-8', 'NONE' => 'ISO-8859-1', } SingleCharPattern = { 'EUC-JP' => /(?: [\x00-\x7f] |[\xa1-\xfe][\xa1-\xfe] |\x8e[\xa1-\xfe] |\x8f[\xa1-\xfe][\xa1-\xfe])/nx, 'Shift_JIS' => /(?: [\x00-\x7f] |[\x81-\x9f][\x40-\x7e\x80-\xfc] |[\xa1-\xdf] |[\xe0-\xfc][\x40-\x7e\x80-\xfc])/nx, 'UTF-8' => /(?: [\x00-\x7f] |[\xc0-\xdf][\x80-\xbf] |[\xe0-\xef][\x80-\xbf][\x80-\xbf] |[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf] |[\xf8-\xfb][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf] |[\xfc-\xfd][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf])/nx, 'ISO-8859-1' => /[\x00-\xff]/n } FirstCharPattern = {} SingleCharPattern.each {|charset, pat| FirstCharPattern[charset] = /\A#{pat}/ } SubCharset = { 'ISO-2022-JP-2' => ['US-ASCII', 'ISO-2022-JP'], 'ISO-2022-JP-3' => ['US-ASCII', 'ISO-2022-JP'], 'UTF-16BE' => [], 'UTF-16LE' => [], 'UTF-16' => [], } SubCharset.default = ['US-ASCII'] # :startdoc: end end htree-0.8/htree/extract_text.rb0000644000175000017500000000077611747021106015661 0ustar jonasjonasrequire 'htree/text' require 'htree/doc' require 'htree/elem' module HTree module Node def extract_text raise NotImplementedError end end class Location def extract_text to_node.extract_text end end # :stopdoc: module Container def extract_text Text.concat(*@children.map {|n| n.extract_text }) end end module Leaf def extract_text Text.new('') end end class Text def extract_text self end end # :startdoc: end htree-0.8/htree/inspect.rb0000644000175000017500000000451411747021106014602 0ustar jonasjonasrequire 'pp' require 'htree/doc' require 'htree/elem' require 'htree/leaf' require 'htree/tag' require 'htree/output' require 'htree/raw_string' module HTree # :stopdoc: class Doc def pretty_print(q) q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } } end alias inspect pretty_print_inspect end class Elem def pretty_print(q) if @empty q.group(1, '{emptyelem', '}') { q.breakable; q.pp @stag } else q.group(1, "{elem", "}") { q.breakable; q.pp @stag @children.each {|elt| q.breakable; q.pp elt } if @etag q.breakable; q.pp @etag end } end end alias inspect pretty_print_inspect end module Leaf def pretty_print(q) q.group(1, '{', '}') { q.text self.class.name.sub(/.*::/,'').downcase if rs = @raw_string rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line| q.breakable q.pp line } elsif self.respond_to? :display_xml q.breakable q.text self.display_xml('') end } end alias inspect pretty_print_inspect end class Name def inspect if xmlns? @local_name ? "xmlns:#{@local_name}" : "xmlns" elsif !@namespace_uri || @namespace_uri.empty? @local_name elsif @namespace_prefix "#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}" elsif @namespace_prefix == false "-{#{@namespace_uri}}#{@local_name}" else "{#{@namespace_uri}}#{@local_name}" end end end class STag def pretty_print(q) q.group(1, '<', '>') { q.text @name.inspect @attributes.each {|n, t| q.breakable q.text "#{n.inspect}=\"#{t.to_attvalue_content}\"" } } end alias inspect pretty_print_inspect end class ETag def pretty_print(q) q.group(1, '') { q.text @qualified_name } end alias inspect pretty_print_inspect end class BogusETag def pretty_print(q) q.group(1, '{', '}') { q.text self.class.name.sub(/.*::/,'').downcase if rs = @raw_string q.breakable q.text rs else q.text "" end } end end # :startdoc: end htree-0.8/htree/name.rb0000644000175000017500000000727711747021106014066 0ustar jonasjonasrequire 'htree/scan' # for Pat::Nmtoken require 'htree/context' module HTree # Name represents a element name and attribute name. # It consists of a namespace prefix, a namespace URI and a local name. class Name =begin element name prefix uri localname {u}n, n with xmlns=u nil 'u' 'n' p{u}n, p:n with xmlns:p=u 'p' 'u' 'n' n with xmlns='' nil '' 'n' attribute name xmlns= 'xmlns' nil nil xmlns:n= 'xmlns' nil 'n' p{u}n=, p:n= with xmlns:p=u 'p' 'u' 'n' n= nil '' 'n' =end def Name.parse_element_name(name, context) if /\{(.*)\}/ =~ name # "{u}n" means "use default namespace", # "p{u}n" means "use the specified prefix p" $` == '' ? Name.new(nil, $1, $') : Name.new($`, $1, $') elsif /:/ =~ name && !context.namespace_uri($`).empty? Name.new($`, context.namespace_uri($`), $') elsif !context.namespace_uri(nil).empty? Name.new(nil, context.namespace_uri(nil), name) else Name.new(nil, '', name) end end def Name.parse_attribute_name(name, context) if name == 'xmlns' Name.new('xmlns', nil, nil) elsif /\Axmlns:/ =~ name Name.new('xmlns', nil, $') elsif /\{(.*)\}/ =~ name case $` when ''; Name.new(nil, $1, $') else Name.new($`, $1, $') end elsif /:/ =~ name && !context.namespace_uri($`).empty? Name.new($`, context.namespace_uri($`), $') else Name.new(nil, '', name) end end NameCache = {} def Name.new(namespace_prefix, namespace_uri, local_name) key = [namespace_prefix, namespace_uri, local_name, self] NameCache.fetch(key) { 0.upto(2) {|i| key[i] = key[i].dup.freeze if key[i] } NameCache[key] = super(key[0], key[1], key[2]) } end def initialize(namespace_prefix, namespace_uri, local_name) @namespace_prefix = namespace_prefix @namespace_uri = namespace_uri @local_name = local_name if @namespace_prefix && /\A#{Pat::Nmtoken}\z/o !~ @namespace_prefix raise HTree::Error, "invalid namespace prefix: #{@namespace_prefix.inspect}" end if @local_name && /\A#{Pat::Nmtoken}\z/o !~ @local_name raise HTree::Error, "invalid local name: #{@local_name.inspect}" end if @namespace_prefix == 'xmlns' unless @namespace_uri == nil raise HTree::Error, "Name object for xmlns:* must not have namespace URI: #{@namespace_uri.inspect}" end else unless String === @namespace_uri raise HTree::Error, "invalid namespace URI: #{@namespace_uri.inspect}" end end end attr_reader :namespace_prefix, :namespace_uri, :local_name def xmlns? @namespace_prefix == 'xmlns' && @namespace_uri == nil end def universal_name if @namespace_uri && !@namespace_uri.empty? "{#{@namespace_uri}}#{@local_name}" else @local_name.dup end end def qualified_name if @namespace_uri && !@namespace_uri.empty? if @namespace_prefix "#{@namespace_prefix}:#{@local_name}" else @local_name.dup end elsif @local_name @local_name.dup else "xmlns" end end def to_s if @namespace_uri && !@namespace_uri.empty? if @namespace_prefix "#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}" else "{#{@namespace_uri}}#{@local_name}" end elsif @local_name @local_name.dup else "xmlns" end end end end htree-0.8/htree/parse.rb0000644000175000017500000003070011747021106014243 0ustar jonasjonasrequire 'htree/scan' require 'htree/htmlinfo' require 'htree/text' require 'htree/tag' require 'htree/leaf' require 'htree/doc' require 'htree/elem' require 'htree/raw_string' require 'htree/context' require 'htree/encoder' require 'htree/fstr' module HTree # HTree.parse parses input and return a document tree. # represented by HTree::Doc. # # input should be a String or # an object which respond to read or open method. # For example, IO, StringIO, Pathname, URI::HTTP and URI::FTP are acceptable. # Note that the URIs need open-uri. # # HTree.parse guesses input is HTML or not and XML or not. # # If it is guessed as HTML, the default namespace in the result is set to http://www.w3.org/1999/xhtml # regardless of input has XML namespace declaration or not nor even it is pre-XML HTML. # # If it is guessed as HTML and not XML, all element and attribute names are downcaseed. # # If opened file or read content has charset method, # HTree.parse decode it according to $KCODE before parsing. # Otherwise HTree.parse assumes the character encoding of the content is # compatible to $KCODE. # Note that the charset method is provided by URI::HTTP with open-uri. def HTree.parse(input) HTree.with_frozen_string_hash { parse_as(input, false) } end # HTree.parse_xml parses input as XML and # return a document tree represented by HTree::Doc. # # It behaves almost same as HTree.parse but it assumes input is XML # even if no XML declaration. # The assumption causes following differences. # * doesn't downcase element name. # * The content of ' assert_equal([ [:stag, ''], [:stag, ''], [:stag, ''], ], scan(s)) s = '\nd\n\ne" assert_equal([ [:stag, ""], [:text_pcdata, "a\n"], [:stag, ""], [:text_pcdata, "\nb\n"], [:stag, ""], [:text_pcdata, "\nd\n"], [:etag, ""], [:text_pcdata, "\ne"], ], scan(s)) end def test_eol_xml # In XML, line breaks are treated as part of content. # It's because KEEPRSRE is yes in XML. # http://www.satoshii.org/markup/websgml/valid-xml#keeprsre s = "a\n\nb\n\nc\n\nd\n\ne" assert_equal([ [:xmldecl, ""], [:text_pcdata, "a\n"], [:stag, ""], [:text_pcdata, "\nb\n"], [:stag, ""], [:text_pcdata, "\nc\n"], [:etag, ""], [:text_pcdata, "\nd\n"], [:etag, ""], [:text_pcdata, "\ne"], ], scan(s)) end def test_xml_html_detection assert_equal([false, true], HTree.scan("") {}) assert_equal([true, false], HTree.scan("") {}) assert_equal([true, true], HTree.scan('') {}) end def test_quoted_attr assert_equal([[:emptytag, '']], scan('')) end def test_bare_slash assert_equal([[:stag, '']], scan('')) assert_equal([[:stag, '']], scan('')) end end htree-0.8/test/test-rexml.rb0000644000175000017500000000416711747021106015115 0ustar jonasjonasrequire 'test/unit' require 'htree/parse' require 'htree/rexml' begin require 'rexml/document' rescue LoadError end class TestREXML < Test::Unit::TestCase def test_doc r = HTree.parse('').to_rexml assert_instance_of(REXML::Document, r) end def test_elem r = HTree.parse('').to_rexml assert_instance_of(REXML::Element, e = r.root) assert_equal('root', e.name) assert_equal('b', e.attribute('a').to_s) end def test_text r = HTree.parse('aaa').to_rexml assert_instance_of(REXML::Text, t = r.root.children[0]) assert_equal('aaa', t.to_s) end def test_xmldecl s = '' r = HTree.parse(s + 'aaa').to_rexml assert_instance_of(REXML::XMLDecl, x = r.children[0]) assert_equal('1.0', x.version) assert_equal(nil, x.standalone) assert_instance_of(REXML::XMLDecl, HTree.parse(s).children[0].to_rexml) end def test_doctype s = '' r = HTree.parse(s + 'xxx').to_rexml assert_instance_of(REXML::DocType, d = r.children[0]) assert_equal('html', d.name) assert_equal('PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"', d.external_id) assert_instance_of(REXML::DocType, HTree.parse(s).children[0].to_rexml) end def test_procins r = HTree.parse('').to_rexml assert_instance_of(REXML::Instruction, i = r.root.children[0]) assert_equal('xxx', i.target) assert_equal('yyy', i.content) assert_instance_of(REXML::Instruction, HTree.parse('').children[0].to_rexml) end def test_comment r = HTree.parse('').to_rexml assert_instance_of(REXML::Comment, c = r.root.children[0]) assert_equal(' zzz ', c.to_s) end def test_bogusetag assert_equal(nil, HTree.parse('').children[0].to_rexml) end def test_style assert_equal('', HTree.parse('').to_rexml.to_s[//]) end end if defined? REXML htree-0.8/test/test-context.rb0000644000175000017500000000202011747021106015434 0ustar jonasjonasrequire 'test/unit' require 'htree/context' class TestContext < Test::Unit::TestCase def test_namespaces_validation assert_raise(ArgumentError) { HTree::Context.new({1=>'u'}) } assert_raise(ArgumentError) { HTree::Context.new({''=>'u'}) } assert_raise(ArgumentError) { HTree::Context.new({'p'=>nil}) } assert_nothing_raised { HTree::Context.new({nil=>'u'}) } end def test_namespace_uri assert_equal('http://www.w3.org/XML/1998/namespace', HTree::Context.new.namespace_uri('xml')) assert_equal('u', HTree::Context.new({nil=>'u'}).namespace_uri(nil)) assert_equal('u', HTree::Context.new({'p'=>'u'}).namespace_uri('p')) assert_equal(nil, HTree::Context.new({'p'=>'u'}).namespace_uri('q')) end def test_subst_namespaces c1 = HTree::Context.new({'p'=>'u'}) c2 = c1.subst_namespaces({'q'=>'v'}) assert_equal('u', c1.namespace_uri('p')) assert_equal(nil, c1.namespace_uri('q')) assert_equal('u', c2.namespace_uri('p')) assert_equal('v', c2.namespace_uri('q')) end end htree-0.8/test/test-raw_string.rb0000644000175000017500000000072311747021106016137 0ustar jonasjonasrequire 'test/unit' require 'htree' class TestRawString < Test::Unit::TestCase def test_elem t = HTree.parse("x") assert_equal("x", t.root.raw_string) assert_equal("x", t.root.raw_string) # raw_string shouldn't have side effect. end def test_no_raw_string t = HTree::Elem.new('a') assert_equal(nil, t.raw_string) t = HTree::Elem.new('a', HTree.parse("x").root) assert_equal(nil, t.raw_string) end end htree-0.8/test/test-elem-new.rb0000644000175000017500000000636311747021106015477 0ustar jonasjonasrequire 'test/unit' require 'htree/doc' require 'htree/elem' require 'htree/equality' require 'htree/traverse' class TestElemNew < Test::Unit::TestCase def test_empty e = HTree::Elem.new('a') assert_equal('a', e.qualified_name) assert_equal({}, e.attributes) assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context) assert_equal([], e.children) assert_equal(true, e.empty_element?) assert_nil(e.instance_variable_get(:@etag)) end def test_empty_array e = HTree::Elem.new('a', []) assert_equal('a', e.qualified_name) assert_equal({}, e.attributes) assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context) assert_equal([], e.children) assert_equal(false, e.empty_element?) assert_equal(nil, e.instance_variable_get(:@etag)) end def test_empty_attr e = HTree::Elem.new('a', {'href'=>'xxx'}) assert_equal('a', e.qualified_name) assert_equal({HTree::Name.parse_attribute_name('href', HTree::DefaultContext)=>HTree::Text.new('xxx')}, e.attributes) assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context) assert_equal([], e.children) assert_equal(true, e.empty_element?) assert_equal(nil, e.instance_variable_get(:@etag)) end def test_node t = HTree::Text.new('t') e = HTree::Elem.new('a', t) assert_equal({}, e.attributes) assert_equal([t], e.children) end def test_hash t = HTree::Text.new('t') e = HTree::Elem.new('a', {'b' => t}) assert_equal([['b', t]], e.attributes.map {|n,v| [n.universal_name, v] }) assert_equal([], e.children) end def test_string t = HTree::Text.new('s') e = HTree::Elem.new('a', "s") assert_equal({}, e.attributes) assert_equal([t], e.children) end def test_interleave t = HTree::Text.new('t') e = HTree::Elem.new('a', t, {'b' => t}, t, {'c' => 'd'}, t) assert_equal([['b', t], ['c', HTree::Text.new('d')]], e.attributes.map {|n,v| [n.universal_name, v] }.sort) assert_equal([t, t, t], e.children) end def test_nest t = HTree::Text.new('t') b = HTree::BogusETag.new('a') x = HTree::Elem.new('e', HTree::XMLDecl.new('1.0')) d = HTree::Elem.new('e', HTree::DocType.new('html')) e = HTree::Elem.new('a', [t, t, t, b, x, d]) assert_equal({}, e.attributes) assert_equal([t, t, t, b, x, d], e.children) end def test_err assert_raises(TypeError) { HTree::Elem.new('e', HTree::STag.new('a')) } assert_raises(TypeError) { HTree::Elem.new('e', HTree::ETag.new('a')) } end def test_context context = HTree::DefaultContext.subst_namespaces({'p'=>'u'}) elem = HTree::Elem.new('p:n', {'p:a'=>'t'}, context) assert_equal('{u}n', elem.name) assert_equal('t', elem.get_attr('{u}a')) assert_same(context, elem.instance_variable_get(:@stag).inherited_context) assert_raises(ArgumentError) { HTree::Elem.new('e', context, context) } end def test_hash_in_array attrs = [{'a'=>'1'}, {'a'=>'2'}] assert_raises(TypeError) { HTree::Elem.new('e', attrs) } attrs.pop assert_raises(TypeError) { HTree::Elem.new('e', attrs) } attrs.pop assert_equal([], attrs) assert_equal(false, HTree::Elem.new('e', attrs).empty_element?) end end htree-0.8/test/test-parse.rb0000644000175000017500000000762211747021106015077 0ustar jonasjonasrequire 'test/unit' require 'htree/parse' require 'htree/equality' require 'htree/traverse' class TestParse < Test::Unit::TestCase def test_empty assert_equal(HTree::Doc.new([]), HTree.parse_xml("").eliminate_raw_string) end def test_xmlns_default t1 = HTree::Doc.new([ HTree::Elem.new!( HTree::STag.new('x1', [['xmlns', 'bb']], HTree::DefaultContext.subst_namespaces({'xml'=>'http://www.w3.org/XML/1998/namespace'})), [HTree::Elem.new!(HTree::STag.new('x2', [], HTree::DefaultContext.subst_namespaces({nil => 'bb', 'xml'=>'http://www.w3.org/XML/1998/namespace'})), nil)]) ]) t2 = HTree.parse_xml('') assert_equal(t1, t2) end def test_doctype_root_element_name assert_equal('html', HTree.parse('').children[0].root_element_name) # xxx: should be downcased? assert_equal('HTML', HTree.parse('').children[1].root_element_name) end def test_doctype_system_identifier assert_equal('http://www.w3.org/TR/html4/loose.dtd', HTree.parse("").children[0].system_identifier) assert_equal('http://www.w3.org/TR/html4/loose.dtd', HTree.parse("").children[0].system_identifier) end def test_procins t = HTree.parse_xml("").children[0] assert_equal('x', t.target) assert_equal(nil, t.content) end def test_eol_html t1 = HTree::Elem.new('a', "\nb\n") s = "\nb\n" t2 = HTree.parse_xml(s).root assert_equal(t1, t2) assert_equal(s, t2.raw_string) end def test_parse_html t1 = HTree.parse("a") assert_equal("{http://www.w3.org/1999/xhtml}html", t1.root.element_name.universal_name) end def test_bare_url t1 = HTree::Elem.new('a', {'href'=>'http://host/'}) s = "" t2 = HTree.parse(s).root assert_equal(t1, t2) end def test_bare_slash t1 = HTree::Elem.new('n', {'a'=>'v/'}, 'x') s = "x" t2 = HTree.parse(s).root assert_equal(t1, t2) end def test_bare_slash_empty t1 = HTree::Elem.new('n', {'a'=>'v/'}) s = "" t2 = HTree.parse(s).root assert_equal(t1, t2) end def test_downcase assert_equal("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF", HTree.parse('').root.name) end def test_downcase_name # HTML && !XML assert_equal('html', HTree.parse('').root.element_name.local_name) assert_equal('html', HTree.parse('').root.element_name.local_name) # HTML && XML assert_equal('html', HTree.parse('').root.element_name.local_name) assert_equal('v', HTree.parse('').root.get_attr('{u}Y')) # !HTML && XML assert_equal('RDF', HTree.parse('').children[1].element_name.local_name) end def test_script_etag assert_equal(HTree::Doc.new(HTree::Elem.new('{http://www.w3.org/1999/xhtml}script', [])), HTree.parse('')) end def test_html_emptyelem t = HTree.parse('') assert_equal(HTree::Doc.new(HTree::Elem.new('{http://www.w3.org/1999/xhtml}html')), t) assert(!t.children[0].empty_element?) end def test_hr_emptyelem t = HTree.parse('
') assert_equal( HTree::Doc.new( HTree::Elem.new('{http://www.w3.org/1999/xhtml}html', HTree::Elem.new('{http://www.w3.org/1999/xhtml}hr'))), t) assert(t.children[0].children[0].empty_element?) end end htree-0.8/test/template.html0000644000175000017500000000011711747021106015154 0ustar jonasjonas dummy_title htree-0.8/test/test-extract_text.rb0000644000175000017500000000052311747021106016474 0ustar jonasjonasrequire 'test/unit' require 'htree/extract_text' require 'htree/equality' class TestExtractText < Test::Unit::TestCase def test_single n = HTree::Text.new('abc') assert_equal(n, n.extract_text) end def test_elem t = HTree::Text.new('abc') n = HTree::Elem.new('e', t) assert_equal(t, n.extract_text) end end htree-0.8/test/test-output.rb0000644000175000017500000001022511747021106015316 0ustar jonasjonasrequire 'test/unit' require 'htree' class TestOutput < Test::Unit::TestCase def gen(t, meth=:output, *rest) encoder = HTree::Encoder.new('US-ASCII', 'US-ASCII') t.__send__(meth, *(rest + [encoder, HTree::DefaultContext])) encoder.finish end def test_text assert_equal('a&<>"b', gen(HTree::Text.new('a&<>"b'))) assert_equal("abc&def", gen(HTree::Text.new("abc&def"))) assert_equal('"\'&', gen(HTree::Text.new('"\'&'))) assert_equal('"\'<&>', gen(HTree::Text.new('"\'<&>'))) end def test_text_attvalue assert_equal('"a&<>"b"', gen(HTree::Text.new('a&<>"b'), :output_attvalue)) assert_equal('"abc"', gen(HTree::Text.new("abc"), :output_attvalue)) assert_equal('"""', gen(HTree::Text.new('"'), :output_attvalue)) end def test_name assert_equal('abc', gen(HTree::Name.parse_element_name('abc', HTree::DefaultContext))) assert_equal('n', gen(HTree::Name.new(nil, 'u', 'n'))) assert_equal('p:n', gen(HTree::Name.new('p', 'u', 'n'))) assert_equal('n', gen(HTree::Name.new(nil, '', 'n'))) assert_equal('xmlns', gen(HTree::Name.new('xmlns', nil, nil))) assert_equal('xmlns:n', gen(HTree::Name.new('xmlns', nil, 'n'))) end def test_name_attribute assert_equal('abc="a&<>"b"', gen(HTree::Name.parse_element_name('abc', HTree::DefaultContext), :output_attribute, HTree::Text.new('a&<>"b'))) end def test_doc t = HTree::Doc.new(HTree::Elem.new('a'), HTree::Elem.new('b')) assert_equal("", gen(t)) end def test_elem t = HTree::Elem.new('a', []) assert_equal("", gen(t)) assert_equal("", gen(HTree::Elem.new!(HTree::STag.new('b')))) assert_equal("", gen(HTree::Elem.new!(HTree::STag.new('b'), []))) assert_equal("", gen(HTree::Elem.new!(HTree::STag.new('a'), [ HTree::Elem.new!(HTree::STag.new('b')), HTree::Elem.new!(HTree::STag.new('c')), HTree::Elem.new!(HTree::STag.new('d')) ]))) end def test_elem_empty t = HTree::Elem.new('a') assert_equal("", gen(t)) end def test_stag assert_equal("", gen(HTree::STag.new("name"), :output_stag)) assert_equal("", gen(HTree::STag.new("name"), :output_emptytag)) assert_equal("", gen(HTree::STag.new("name"), :output_etag)) assert_equal("", gen(HTree::STag.new("name", [["a", "b"]]), :output_emptytag)) assert_equal("", gen(HTree::STag.new("name", [['a', '<"\'>']]), :output_emptytag)) assert_equal("", gen(HTree::STag.new("ppp:nnn", [["xmlns", "uuu\"b"]]), :output_emptytag)) end def test_xmldecl t = HTree::XMLDecl.new('1.0', 'US-ASCII') assert_equal('', gen(t)) assert_equal('', gen(t, :output_prolog_xmldecl)) end def test_doctype t = HTree::DocType.new('html', '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd') assert_equal('', gen(t)) end def test_procins t = HTree::ProcIns.new('xml-stylesheet', 'type="text/xml" href="#style1"') assert_equal('', gen(t)) t = HTree::ProcIns.new('x', nil) assert_equal('', gen(t)) end def test_comment t = HTree::Comment.new('xxx') assert_equal('', gen(t)) end end class TestHTMLOutput < Test::Unit::TestCase def test_top_xmlns assert_equal("aaa", HTree("aaa").display_html("")) end def test_script assert_equal("a < b", HTree("", HTree.expand_template('') {""}.gsub(/\n/, '')) end def test_xml_script v = "x", HTree.expand_template('') {""}.gsub(/\n/, '')) end def test_html_script_invalid_content v = "x"} } end def test_stylexxx v = "xx<y", HTree.expand_template('') {"ab"}.gsub(/\n/, '')) end end class TestCharset2 < Test::Unit::TestCase class CharsetString < String attr_accessor :charset end def with_kcode(kcode) if "".respond_to? :force_encoding if HTree::Encoder.internal_charset.start_with?(kcode.upcase) yield end else old = $KCODE begin $KCODE = kcode yield ensure $KCODE = old end end end def test_us_ascii with_kcode('E') { out = HTree.expand_template(CharsetString.new) { "abc" } assert_equal(out.charset, 'US-ASCII') } end def test_euc_jp with_kcode('E') { out = HTree.expand_template(CharsetString.new) { str = "\xa1\xa1" str.force_encoding("EUC-JP") if str.respond_to? :force_encoding str } assert_equal(out.charset, 'EUC-JP') } end def test_utf_8 with_kcode('U') { out = HTree.expand_template(CharsetString.new) { str = "\xc2\xa1" str.force_encoding("UTF-8") if str.respond_to? :force_encoding str } assert_equal(out.charset, 'UTF-8') } end end class TestTemplateDOCTYPE < Test::Unit::TestCase def test_html assert_equal( '', HTree.expand_template('') {''}.gsub(/\n/, '')) end end htree-0.8/README0000644000175000017500000000171011747021106012354 0ustar jonasjonas= htree - HTML/XML tree library htree provides a tree data structure which represent HTML and XML data. == Feature * Permissive unified HTML/XML parser * byte-to-byte round-tripping unparser * XML namespace support * Dedicated class for escaped string. This ease sanitization. * HTML/XHTML/XML generator * template engine * recursive template expansion * converter to REXML document == Home Page http://www.a-k-r.org/htree/ == Requirements * ruby : http://www.ruby-lang.org/ == Download * latest release: http://www.a-k-r.org/htree/htree-0.8.tar.gz * development version: https://github.com/akr/htree == Install % ruby install.rb == Reference Manual See rdoc/index.html or http://www.a-k-r.org/htree/rdoc/ == Usage Following two-line script convert HTML to XHTML. require 'htree' HTree(STDIN).display_xml The conversion method to REXML is provided as to_rexml. HTree(...).to_rexml == License Ruby's == Author Tanaka Akira htree-0.8/Makefile0000644000175000017500000000052411747021106013136 0ustar jonasjonasRUBY=ruby all: README rdoc/index.html README: misc/README.erb erb misc/README.erb > README check test: $(RUBY) -I. test-all.rb install: $(RUBY) install.rb .PHONY: check test all install RB = htree.rb htree/modules.rb $(wildcard htree/[a-l]*.rb) $(wildcard htree/[n-z]*.rb) rdoc/index.html: $(RB) rm -rf doc rdoc --op rdoc $(RB) htree-0.8/install.rb0000644000175000017500000000465711747021106013504 0ustar jonasjonas#!/usr/bin/env ruby # usage: ruby install.rb [-n] [--destdir=DESTDIR] # options: # -n : don't install # --destdir=DESTDIR # # Author: Tanaka Akira require 'optparse' require 'fileutils' def target_directory $:.each {|loc| if %r{/site_ruby/[\d.]+\z} =~ loc return loc end } raise "could not find target install directory" end CVS_FILES = {} def cvs_files(dir) return CVS_FILES[dir] if CVS_FILES.include? dir if File.directory? "#{dir}/CVS" result = {} File.foreach("#{dir}/CVS/Entries") {|line| case line when %r{\A/([^/]+)/} then result[$1] = true when %r{\AD/([^/]+)/} then result[$1] = true end } else result = nil end CVS_FILES[dir] = result result end def each_target(&block) target_set = {} cvs = cvs_files('.') Dir.glob("*.rb") {|filename| next if /\Atest-/ =~ filename next if /\Ainstall/ =~ filename next if cvs && !cvs.include?(filename) target_set[filename] = true yield filename each_require(filename, target_set, &block) } end def each_require(file, target_set, &block) File.foreach(file) {|line| next if /\A\s*require\s+['"]([^'"]+)['"]/ !~ line feature = $1 filename = "#{feature}.rb" next if target_set.include? filename next if !File.exist?(filename) target_set[filename] = true yield filename each_require(filename, target_set, &block) } end def collect_target result = [] each_target {|filename| result << filename } result.sort! result end def install_file(src, dst) ignore_exc(Errno::ENOENT) { return if FileUtils.compare_file src, dst } # check shadow ignore_exc(Errno::ENOENT) { File.unlink dst } FileUtils.mkdir_p(File.dirname(dst), :mode=>0755) FileUtils.cp(src, dst, :verbose => true) File.chmod(0644, dst) end def ignore_exc(exc) begin yield rescue exc end end $opt_n = false $opt_destdir = "" ARGV.options {|q| q.banner = 'ruby install.rb [opts]' q.def_option('--help', 'show this message') {puts q; exit(0)} q.def_option('-n', "don't install") { $opt_n = true } q.def_option('--destdir=DESTDIR', "specify DESTDIR") {|destdir| $opt_destdir = destdir } q.parse! } if $opt_n dir = target_directory collect_target.each {|filename| puts "-> #{$opt_destdir}#{dir}/#{filename}" } exit else File.umask 022 dir = target_directory collect_target.each {|filename| install_file filename, "#{$opt_destdir}#{dir}/#{filename}" } end htree-0.8/htree.rb0000644000175000017500000000521211747021106013131 0ustar jonasjonas# # = htree.rb # # HTML/XML document tree # # Author:: Tanaka Akira # # == Features # # - Permissive unified HTML/XML parser # - byte-to-byte round-tripping unparser # - XML namespace support # - Dedicated class for escaped string. This ease sanitization. # - XHTML/XML generator # - template engine: link:files/htree/template_rb.html # - recursive template expansion # - REXML tree generator: link:files/htree/rexml_rb.html # # == Example # # The following one-liner prints parsed tree object. # # % ruby -rhtree -e 'pp HTree(ARGF)' html-file # # The following two-line script convert HTML to XHTML. # # require 'htree' # HTree(STDIN).display_xml # # The conversion method to REXML is provided as to_rexml. # # HTree(...).to_rexml # # == Module/Class Hierarchy # # * HTree # * HTree::Name # * HTree::Context # * HTree::Location # * HTree::Node # * HTree::Doc # * HTree::Elem # * HTree::Text # * HTree::XMLDecl # * HTree::DocType # * HTree::ProcIns # * HTree::Comment # * HTree::BogusETag # * HTree::Error # # == Method Summary # # HTree provides following methods. # # - Parsing Methods # - HTree(html_string) -> HTree::Doc # - HTree.parse(input) -> HTree::Doc # # - Generation Methods # - HTree::Node#display_xml -> STDOUT # - HTree::Node#display_xml(out) -> out # - HTree::Node#display_xml(out, encoding) -> out # - HTree::Text#to_s -> String # # - Template Methods # - HTree.expand_template{template_string} -> STDOUT # - HTree.expand_template(out){template_string} -> out # - HTree.expand_template(out, encoding){template_string} -> out # - HTree.compile_template(template_string) -> Module # - HTree{template_string} -> HTree::Doc # # - Traverse Methods # - HTree::Elem#attributes -> Hash[HTree::Name -> HTree::Text] # - HTree::Elem::Location#attributes -> Hash[HTree::Name -> HTree::Location] # # - Predicate Methods # - HTree::Traverse#doc? -> true or false # - HTree::Traverse#elem? -> true or false # - HTree::Traverse#text? -> true or false # - HTree::Traverse#xmldecl? -> true or false # - HTree::Traverse#doctype? -> true or false # - HTree::Traverse#procins? -> true or false # - HTree::Traverse#comment? -> true or false # - HTree::Traverse#bogusetag? -> true or false # # - REXML Tree Generator # - HTree::Node#to_rexml -> REXML::Child require 'htree/parse' require 'htree/extract_text' require 'htree/equality' require 'htree/inspect' require 'htree/display' require 'htree/loc' require 'htree/traverse' require 'htree/template' require 'htree/rexml' htree-0.8/.cvsignore0000644000175000017500000000003311747021106013471 0ustar jonasjonasindex.html README.html doc