} {emptyelem }}
#
def subst_subnode(pairs)
hash = {}
pairs.each {|index, value|
case index
when Name, Integer
when String
index = Name.parse_attribute_name(index, DefaultContext)
else
raise TypeError, "invalid index: #{index.inspect}"
end
value = value.to_node if HTree::Location === value
case value
when Node
value = [value]
when String
value = [value]
when Array
value = value.dup
when nil
value = []
else
raise TypeError, "invalid value: #{value.inspect}"
end
value.map! {|v|
v = v.to_node if HTree::Location === v
case v
when Node
v
when String
Text.new(v)
else
raise TypeError, "invalid value: #{v.inspect}"
end
}
if !hash.include?(index)
hash[index] = []
end
hash[index].concat value
}
attrs = []
@stag.attributes.each {|k, v|
if hash.include? k
v = hash[k]
if !v.empty?
attrs << {k=>Text.concat(*v)}
end
hash.delete k
else
attrs << {k=>v}
end
}
hash.keys.each {|k|
if Name === k
v = hash[k]
if !v.empty?
attrs << {k=>Text.concat(*v)}
end
hash.delete k
end
}
children_left = []
children = @children.dup
children_right = []
hash.keys.sort.each {|index|
value = hash[index]
if index < 0
children_left << value
elsif children.length <= index
children_right << value
else
children[index] = value
end
}
children = [children_left, children, children_right].flatten
if children.empty? && @empty
Elem.new(
@stag.element_name,
@stag.context,
*attrs)
else
Elem.new(
@stag.element_name,
@stag.context,
children,
*attrs)
end
end
end
module Elem::Trav
private
def update_attribute_hash
if defined?(@attribute_hash)
@attribute_hash
else
h = {}
each_attribute {|name, text|
h[name.universal_name] = text
}
@attribute_hash = h
end
end
end
end
htree-0.8/htree/text.rb 0000644 0001750 0001750 00000006223 11747021106 014120 0 ustar jonas jonas require 'htree/modules'
require 'htree/raw_string'
require 'htree/htmlinfo'
require 'htree/encoder'
require 'htree/fstr'
if !"".respond_to?(:encode)
require 'iconv'
end
module HTree
class Text
# :stopdoc:
class << self
alias new_internal new
end
# :startdoc:
def Text.new(arg)
arg = arg.to_node if HTree::Location === arg
if Text === arg
new_internal arg.rcdata, arg.normalized_rcdata
elsif String === arg
arg2 = arg.gsub(/&/, '&')
arg = arg2.freeze if arg != arg2
new_internal arg
else
raise TypeError, "cannot initialize Text with #{arg.inspect}"
end
end
def initialize(rcdata, normalized_rcdata=internal_normalize(rcdata)) # :notnew:
init_raw_string
@rcdata = rcdata && HTree.frozen_string(rcdata)
@normalized_rcdata = @rcdata == normalized_rcdata ? @rcdata : normalized_rcdata
end
attr_reader :rcdata, :normalized_rcdata
def internal_normalize(rcdata)
# - character references are decoded as much as possible.
# - undecodable character references are converted to decimal numeric character refereces.
result = rcdata.gsub(/&(?:#([0-9]+)|#x([0-9a-fA-F]+)|([A-Za-z][A-Za-z0-9]*));/o) {|s|
u = nil
if $1
u = $1.to_i
elsif $2
u = $2.hex
elsif $3
u = NamedCharacters[$3]
end
if !u || u < 0 || 0x7fffffff < u
'?'
elsif u == 38 # '&' character.
'&'
elsif u <= 0x7f
[u].pack("C")
else
us = [u].pack("U")
if us.respond_to? :encode
us.encode(Encoder.internal_charset, :xml=>:text)
else
begin
Iconv.conv(Encoder.internal_charset, 'UTF-8', us)
rescue Iconv::Failure
"#{u};"
end
end
end
}
HTree.frozen_string(result)
end
private :internal_normalize
# HTree::Text#to_s converts the text to a string.
# - character references are decoded as much as possible.
# - undecodable character reference are converted to `?' character.
def to_s
@normalized_rcdata.gsub(/&(?:#([0-9]+));/o) {|s|
u = $1.to_i
if 0 <= u && u <= 0x7f
[u].pack("C")
else
'?'
end
}
end
def empty?
@normalized_rcdata.empty?
end
def strip
rcdata = @normalized_rcdata.dup
rcdata.sub!(/\A(?:\s| )+/, '')
rcdata.sub!(/(?:\s| )+\z/, '')
if rcdata == @normalized_rcdata
self
else
rcdata.freeze
Text.new_internal(rcdata, rcdata)
end
end
# HTree::Text.concat returns a text which is concatenation of arguments.
#
# An argument should be one of follows.
# - String
# - HTree::Text
# - HTree::Location which points HTree::Text
def Text.concat(*args)
rcdata = ''
args.each {|arg|
arg = arg.to_node if HTree::Location === arg
if Text === arg
rcdata << arg.rcdata
else
rcdata << arg.gsub(/&/, '&')
end
}
new_internal rcdata
end
end
end
htree-0.8/htree/context.rb 0000644 0001750 0001750 00000003710 11747021106 014616 0 ustar jonas jonas module HTree
class Context
# :stopdoc:
DefaultNamespaces = {'xml'=>'http://www.w3.org/XML/1998/namespace'}
DefaultNamespaces.default = ""
DefaultNamespaces.freeze
# :startdoc:
# The optional argument `namespaces' should be a hash or nil.
# HTree::DefaultNamespaces is used if nil is specified.
#
# If it is a hash, its key should be nil or a string.
# nil means default namespace.
# The string means some prefix which must not be empty.
#
# The hash value should be a string.
# The empty string "" means unbound namespace.
def initialize(namespaces=nil)
namespaces ||= DefaultNamespaces
namespaces.each_pair {|k, v|
check_namespace_prefix(k)
check_namespace_uri(v)
}
namespaces = namespaces.dup.freeze unless namespaces.frozen?
@namespaces = namespaces
end
attr_reader :namespaces
# return a namespace URI corresponding to _prefix_.
# It returns nil if _prefix_ is not defined.
def namespace_uri(prefix)
@namespaces[prefix]
end
# generate a new Context object which namespaces are substituted by
# a hash _declared_namespaces_.
def subst_namespaces(declared_namespaces)
namespaces = @namespaces.dup
declared_namespaces.each {|k, v|
check_namespace_prefix(k)
check_namespace_uri(v)
namespaces[k] = v
}
if namespaces == @namespaces
self
else
Context.new(namespaces)
end
end
private
def check_namespace_prefix(k)
unless (String === k && !k.empty?) || k == nil
raise ArgumentError, "invalid namespace prefix: #{k.inspect}"
end
end
def check_namespace_uri(v)
unless String === v
raise ArgumentError, "invalid namespace URI: #{v.inspect}"
end
end
end
# :stopdoc:
DefaultContext = Context.new
HTMLContext = DefaultContext.subst_namespaces(nil=>"http://www.w3.org/1999/xhtml")
# :startdoc:
end
htree-0.8/htree/equality.rb 0000644 0001750 0001750 00000011277 11747021106 014776 0 ustar jonas jonas require 'htree/doc'
require 'htree/elem'
require 'htree/leaf'
require 'htree/tag'
require 'htree/raw_string'
require 'htree/context'
module HTree
# compare tree structures.
def ==(other)
check_equality(self, other, :usual_equal_object)
end
alias eql? ==
# hash value for the tree structure.
def hash
return @hash_code if defined? @hash_code
@hash_code = usual_equal_object.hash
end
# :stopdoc:
def usual_equal_object
return @usual_equal_object if defined? @usual_equal_object
@usual_equal_object = make_usual_equal_object
end
def make_usual_equal_object
raise NotImplementedError
end
def exact_equal_object
return @exact_equal_object if defined? @exact_equal_object
@exact_equal_object = make_exact_equal_object
end
def make_exact_equal_object
raise NotImplementedError
end
def exact_equal?(other)
check_equality(self, other, :exact_equal_object)
end
def check_equality(obj1, obj2, equal_object_method)
return false unless obj1.class == obj2.class
if obj1.class == Array
return false unless obj1.length == obj2.length
obj1.each_with_index {|c1, i|
return false unless c1.class == obj2[i].class
}
obj1.each_with_index {|c1, i|
return false unless check_equality(c1, obj2[i], equal_object_method)
}
true
elsif obj1.respond_to? equal_object_method
o1 = obj1.send(equal_object_method)
o2 = obj2.send(equal_object_method)
check_equality(o1, o2, equal_object_method)
else
obj1 == obj2
end
end
class Doc
alias exact_equal_object children
alias usual_equal_object children
end
class Elem
def make_exact_equal_object
[@stag, @children, @empty, @etag]
end
def make_usual_equal_object
[@stag, @children]
end
end
class Name
def make_exact_equal_object
[@namespace_prefix, @namespace_uri, @local_name]
end
def make_usual_equal_object
xmlns? ? @local_name : [@namespace_uri, @local_name]
end
end
module Util
module_function
def cmp_with_nil(a, b)
if a == nil
if b == nil
0
else
-1
end
else
if b == nil
1
else
a <=> b
end
end
end
end
class Context
def make_exact_equal_object
@namespaces.keys.sort {|prefix1, prefix2|
Util.cmp_with_nil(prefix1, prefix2)
}.map {|prefix| [prefix, @namespaces[prefix]] }
end
# make_usual_equal_object is not used through STag#make_usual_equal_object
# NotImplementedError is suitable?
alias make_usual_equal_object make_exact_equal_object
end
class STag
def make_exact_equal_object
[@raw_string,
@name,
@attributes.sort {|(n1, _), (n2, _)|
Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
Util.cmp_with_nil(n1.local_name, n2.local_name)
},
@inherited_context
]
end
def make_usual_equal_object
[@name,
@attributes.find_all {|n,t| !n.xmlns? }.sort {|(n1, _), (n2, _)|
Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
Util.cmp_with_nil(n1.local_name, n2.local_name)
}
]
end
end
class ETag
def make_exact_equal_object
[@raw_string, @qualified_name]
end
alias usual_equal_object qualified_name
end
class Text
def make_exact_equal_object
[@raw_string, @rcdata]
end
def make_usual_equal_object
@normalized_rcdata
end
end
class XMLDecl
def make_exact_equal_object
[@raw_string, @version, @encoding, @standalone]
end
def make_usual_equal_object
[@version, @encoding, @standalone]
end
end
class DocType
def make_exact_equal_object
[@raw_string, @root_element_name, @system_identifier, @public_identifier]
end
def make_usual_equal_object
[@root_element_name, @system_identifier, @public_identifier]
end
end
class ProcIns
def make_exact_equal_object
[@raw_string, @target, @content]
end
def make_usual_equal_object
[@target, @content]
end
end
class Comment
def make_exact_equal_object
[@raw_string, @content]
end
alias usual_equal_object content
end
class BogusETag
def make_exact_equal_object
[@etag]
end
alias usual_equal_object make_exact_equal_object
end
class Location
def make_exact_equal_object
[@parent, @index, @node]
end
alias usual_equal_object make_exact_equal_object
end
# :startdoc:
end
htree-0.8/htree/regexp-util.rb 0000644 0001750 0001750 00000001125 11747021106 015375 0 ustar jonas jonas class Regexp
def disable_capture
re = ''
charclass_p = false
self.source.scan(/\\.|[^\\\(\[\]]+|\(\?|\(|\[|\]/m) {|s|
case s
when '('
if charclass_p
re << '('
else
re << '(?:'
end
when '['
charclass_p = true
re << s
when ']'
charclass_p = false
re << s
else
re << s
end
}
if re.respond_to? :force_encoding
re.force_encoding(self.encoding)
Regexp.new(re, self.options)
else
Regexp.new(re, self.options, self.kcode)
end
end
end
htree-0.8/htree/template.rb 0000644 0001750 0001750 00000075346 11747021106 014763 0 ustar jonas jonas # = Template Engine
#
# The htree template engine converts HTML and some data to HTML or XML.
#
# == Template Method Summary
#
# - HTree.expand_template(template_pathname ) -> $stdout
# - HTree.expand_template(template_pathname , obj ) -> $stdout
# - HTree.expand_template(template_pathname , obj , out ) -> out
# - HTree.expand_template(template_pathname , obj , out , encoding ) -> out
#
# - HTree.expand_template{template_string } -> $stdout
# - HTree.expand_template(out ) {template_string } -> out
# - HTree.expand_template(out , encoding ) {template_string } -> out
#
# - HTree.compile_template(template_string ) -> Module
# - HTree{template_string } -> HTree::Doc
#
# Note that the following method, HTree(), is not a template method.
#
# - HTree(html_string ) -> HTree::Doc
#
# == Template Directives.
#
# A template directive is described as a special HTML attribute which name
# begins with underscore.
#
# The template directives are listed as follows.
#
# - name="expr ">content
# - dummy-content
# - expr
# - dummy-content
# - expr
# - then-content
# - content
# - content
# - dummy-content
# - body
#
# === Template Semantics
#
# - attribute substitution
# - name="expr ">content
#
# \_attr_name is used for a dynamic attribute.
#
#
# ->
#
# It is expanded to name ="content".
# The content is generated by evaluating _expr_.
# Usually you don't need to care escaping: &, <, > and " are automatically escaped.
# If you need to output character references,
# the value of _expr_ should be an object which have a +rcdata+ method such as an HTree::Text.
# If the value has a +rcdata+ method,
# it is called and the result is used as the content with escaping <, > and ".
#
# \_attr_name can be used multiple times in single element.
#
# - text substitution
# - dummy-content
# - expr
#
# _text substitutes the content of the element by the string
# evaluated from _expr_.
# _expr_ is described in the attribute value or the content of the element.
#
# If a result of _expr_ have &, < and/or >, they are automatically escaped.
# If you need to output character references,
# the value of _expr_ should be an object which have a +rcdata+ method such as an HTree::Text.
# If the value has a +rcdata+ method,
# it is called and the result is used as the content with escaping < and >.
#
# If the element is span or div, and there is no other attributes,
# no tags are produced.
#
# dummy-content
# -> ...
#
# - tree substitution
# - dummy-content
# - expr
#
# _tree substitutes the content of the element by the htree object
# evaluated from _expr_.
# _expr_ is described in the attribute value or the content of the element.
#
# If the element is span or div, and there is no other attributes,
# no tags are produced.
#
# dummy-content
# -> ...
#
# - conditional
# - then-content
# - then-content
#
# _if is used for conditional.
#
# If expr is evaluated to true, it expands as follows
# regardless of existence of _else.
#
# then-content
# -> then-content
#
# If expr is evaluated to false, it expands using _else.
# If _else is not given, it expands to empty.
# If _else is given, it expands as follows.
#
# then-content
# -> then-content
# -> see _call for further expansion.
#
# It is expanded to then-content if _expr_ is evaluated to
# a true value.
# Otherwise, it is replaced by other template specified by _else attribute.
# If _else attribute is not given, it just replaced by empty.
#
# - iteration
# - content
# - content
#
# _iter and _iter_content is used for iteration.
# _iter iterates the element itself but _iter_content iterates the content.
#
#
# -> ...
#
#
# -> ...
#
# expr.meth(args) specifies iterator method call.
# It is actually called with a block.
# The block have block parameters vars .
# vars must be variables separated by comma.
#
# - template call
# - dummy-content
# - dummy-content
#
# _call is used to expand a template function.
# The template function is defined by _template.
#
# ...
# ...
# -> ...
#
# A local template can be called as follows:
#
# HTree.expand_template{<<'End'}
# [ruby-talk:nnn ]
# Ruby 1.8.0 is released at .
# Ruby 1.8.1 is released at .
# End
#
# mod should be the result of HTree.compile_template.
#
# M = HTree.compile_template(<<'End')
# [ruby-talk:nnn ]
# End
# HTree.expand_template{<<'End'}
#
# Ruby 1.8.0 is released at .
# Ruby 1.8.1 is released at .
#
# End
#
# The module can included.
# In such case, the template function can be called without mod.
# prefix.
#
# include HTree.compile_template(<<'End')
# [ruby-talk:nnn ]
# End
# HTree.expand_template{<<'End'}
#
# Ruby 1.8.0 is released at .
# Ruby 1.8.1 is released at .
#
# End
#
# - template definition
# - body
#
# _template defines a template function which is usable by _call.
#
# When a template is compiled to a module by HTree.compile_template,
# the module have a module function for each template function
# defined by outermost _template attribute.
#
# === White Space Handling
#
# The htree template engine strips whitespace text nodes in a template
# except under HTML pre element.
#
# For example the white space text node between two spans in following template is stripped.
#
# -> "ab"
#
# Character entity references are not stripped.
#
# -> "a b"
#
# Text nodes generated by _text is not stripped.
#
# -> "a b"
#
# == HTML and XML
#
# The htree template engine outputs HTML or XML.
#
# If a template has no XML declaration and the top element is HTML,
# the result is HTML.
# Otherwise the result is XML.
#
# They differs as follows.
#
# - XML declaration is (re-)generated for XML.
# - empty elements ends with a slash for XML.
# - script and style element is escaped for XML.
#
# == Design Decision on Design/Logic Separation
#
# HTree template engine doesn't force you to separate design and logic.
# Any logic (Ruby code) can be embedded in design (HTML).
#
# However the template engine cares the separation by logic refactorings.
# The logic is easy to move between a template and an application.
# For example, following tangled template
#
# tmpl.html:
#
#
# dummy
#
# ...
#
#
# app.rb:
# HTree.expand_template('tmpl.html', obj)
#
# can be refactored as follows.
#
# tmpl.html:
#
#
# dummy
#
# ...
#
#
# app.rb:
# def obj.title
# very-complex-ruby-code
# end
# HTree.expand_template('tmpl.html', obj)
#
# In general, any expression in a template can be refactored to an application
# by extracting it as a method.
# In JSP, this is difficult especially for a code fragment of an iteration.
#
# Also HTree encourages to separate business logic (Ruby code in an application)
# and presentation logic (Ruby code in a template).
# For example, presentation logic to color table rows stripe
# can be embedded in a template.
# It doesn't need to tangle an application.
#
module HTree
# :stopdoc:
EmptyBindingObject = Object.new
# :startdoc:
end
# :stopdoc:
htree_emptybindingobject = HTree::EmptyBindingObject
def htree_emptybindingobject.empty_binding
binding
end
# :startdoc:
require 'htree/parse'
require 'htree/gencode'
require 'htree/equality'
require 'htree/traverse'
# call-seq:
# HTree.expand_template(template_pathname, obj=Object.new, out=$stdout, encoding=internal_encoding) -> out
# HTree.expand_template(out=$stdout, encoding=internal_encoding) { template_string } -> out
#
# HTree.expand_template
expands a template.
#
# The arguments should be specified as follows.
# All argument except pathname are optional.
#
# - HTree.expand_template(pathname , obj , out , encoding ) -> out
# - HTree.expand_template(out , encoding ) {template_string } -> out
#
# The template is specified by a file or a string.
# If a block is not given, the first argument represent a template pathname.
# Otherwise, the block is yielded and its value is interpreted as a template
# string.
# So it can be called as follows in simplest case.
#
# - HTree.expand_template(template_pathname )
# - HTree.expand_template{template_string }
#
# Ruby expressions in the template file specified by _template_pathname_ are
# evaluated in the context of the optional second argument obj as follows.
# I.e. the pseudo variable self in the expressions is bound to obj .
#
# HTree.expand_template(template_pathname, obj)
#
# Ruby expressions in the template_string are evaluated
# in the context of the caller of HTree.expand_template.
# (binding information is specified by the block.)
# I.e. they can access local variables etc.
# We recommend to specify template_string as a literal string without
# interpolation because dynamically generated string may break lexical scope.
#
# HTree.expand_template has two more optional arguments:
# out , encoding .
#
# out specifies output target.
# It should have << method: IO and String for example.
# If it is not specified, $stdout is used.
# If it has a method charset= , it is called to set the minimal charset
# of the result before << is called.
#
# encoding specifies output character encoding.
# If it is not specified, internal encoding is used.
#
# HTree.expand_template returns out or $stdout if out is not
# specified.
#
def HTree.expand_template(*args, &block)
if block
template = block.call
binding = block.binding
else
pathname = args.fetch(0) { raise ArgumentError, "pathname not given" }
args.shift
obj = args.fetch(0) { Object.new }
args.shift
if pathname.respond_to? :read
template = pathname.read.untaint
if template.respond_to? :charset
if template.respond_to? :encode
template = template.encode(HTree::Encoder.internal_charset, template.charset)
else
template = Iconv.conv(HTree::Encoder.internal_charset, template.charset, template)
end
end
else
template = File.read(pathname).untaint
end
Thread.current[:htree_expand_template_obj] = obj
binding = eval(<<-'End',
Thread.current[:htree_expand_template_obj].class.class_eval <<-'EE'
Thread.current[:htree_expand_template_obj].instance_eval { binding }
EE
End
HTree::EmptyBindingObject.empty_binding, "(eval:#{__FILE__}:#{__LINE__})")
Thread.current[:htree_expand_template_obj] = nil
end
out = args.shift || $stdout
encoding = args.shift || HTree::Encoder.internal_charset
if !args.empty?
raise ArgumentError, "wrong number of arguments"
end
HTree::TemplateCompiler.new.expand_template(template, out, encoding, binding)
end
# call-seq:
# HTree(html_string) -> doc
# HTree{template_string} -> doc
#
# HTree(html_string )
parses html_string .
# HTree{template_string }
parses template_string and expand it as a template.
# Ruby expressions in template_string is evaluated in the scope of the caller.
#
# HTree()
and HTree{}
returns a tree as an instance of HTree::Doc.
def HTree(html_string=nil, &block)
if block_given?
raise ArgumentError, "both argument and block given." if html_string
template = block.call
HTree.parse(HTree::TemplateCompiler.new.expand_template(template, '', HTree::Encoder.internal_charset, block.binding))
else
HTree.parse(html_string)
end
end
# call-seq:
# HTree.compile_template(template_string) -> module
#
# HTree.compile_template(template_string )
compiles
# template_string as a template.
#
# HTree.compile_template returns a module.
# The module has module functions for each templates defined in
# template_string .
# The returned module can be used for +include+.
#
# M = HTree.compile_template(<<'End')
#
# 's birthday is .
#
# End
# M.birthday('Ruby', Time.utc(1993, 2, 24)).display_xml
# # Ruby's birthday is February 24th 1993.
#
# The module function takes arguments specifies by a _template
# attribute and returns a tree represented as HTree::Node.
#
def HTree.compile_template(template_string)
code = HTree::TemplateCompiler.new.compile_template(template_string)
Thread.current[:htree_compile_template_code] = code
mod = eval(<<-'End',
eval(Thread.current[:htree_compile_template_code])
End
HTree::EmptyBindingObject.empty_binding, "(eval:#{__FILE__}:#{__LINE__})")
Thread.current[:htree_compile_template_code] = nil
mod
end
# :stopdoc:
class HTree::TemplateCompiler
IGNORABLE_ELEMENTS = {
'span' => true,
'div' => true,
'{http://www.w3.org/1999/xhtml}span' => true,
'{http://www.w3.org/1999/xhtml}div' => true,
}
def initialize
@gensym_id = 0
end
def gensym(suffix='')
@gensym_id += 1
"g#{@gensym_id}#{suffix}"
end
def parse_template(template)
strip_whitespaces(HTree.parse(template))
end
WhiteSpacePreservingElements = {
'{http://www.w3.org/1999/xhtml}pre' => true
}
def strip_whitespaces(template)
case template
when HTree::Doc
HTree::Doc.new(*template.children.map {|c| strip_whitespaces(c) }.compact)
when HTree::Elem, HTree::Doc
return template if WhiteSpacePreservingElements[template.name]
subst = {}
template.children.each_with_index {|c, i|
subst[i] = strip_whitespaces(c)
}
template.subst_subnode(subst)
when HTree::Text
if /\A[ \t\r\n]*\z/ =~ template.rcdata
nil
else
template
end
else
template
end
end
def template_is_html(template)
template.each_child {|c|
return false if c.xmldecl?
return true if c.elem? && c.element_name.namespace_uri == 'http://www.w3.org/1999/xhtml'
}
false
end
def expand_template(template, out, encoding, binding)
template = parse_template(template)
is_html = template_is_html(template)
outvar = gensym('out')
contextvar = gensym('top_context')
code = ''
code << "#{outvar} = HTree::Encoder.new(#{encoding.dump})\n"
code << "#{outvar}.html_output = true\n" if is_html
code << "#{contextvar} = #{is_html ? "HTree::HTMLContext" : "HTree::DefaultContext"}\n"
code << compile_body(outvar, contextvar, template, false)
code << "[#{outvar}.#{is_html ? "finish" : "finish_with_xmldecl"}, #{outvar}.minimal_charset]\n"
#puts code; STDOUT.flush
result, minimal_charset = eval(code, binding, "(eval:#{__FILE__}:#{__LINE__})")
out.charset = minimal_charset if out.respond_to? :charset=
out << result
out
end
def compile_template(src)
srcdoc = parse_template(src)
templates = []
extract_templates(srcdoc, templates, true)
methods = []
templates.each {|name_args, node|
methods << compile_global_template(name_args, node)
}
<<"End"
require 'htree/encoder'
require 'htree/context'
Module.new.module_eval <<'EE'
module_function
#{methods.join('').chomp}
self
EE
End
end
def template_attribute?(name)
/\A_/ =~ name.local_name
end
def extract_templates(node, templates, is_toplevel)
case node
when HTree::Doc
subst = {}
node.children.each_with_index {|n, i|
subst[i] = extract_templates(n, templates, is_toplevel)
}
node.subst_subnode(subst)
when HTree::Elem
ht_attrs, = node.attributes.partition {|name, text| template_attribute? name }
if ht_attrs.empty?
subst = {}
node.children.each_with_index {|n, i|
subst[i] = extract_templates(n, templates, is_toplevel)
}
node.subst_subnode(subst)
else
ht_attrs.each {|htname, text|
if htname.universal_name == '_template'
name_fargs = text.to_s
templates << [name_fargs, node.subst_subnode('_template' => nil)]
return nil
end
}
if is_toplevel
raise HTree::Error, "unexpected template attributes in toplevel: #{ht_attrs.inspect}"
else
node
end
end
else
node
end
end
ID_PAT = /[a-z][a-z0-9_]*/
NAME_FARGS_PAT = /(#{ID_PAT})(?:\(\s*(|#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)\))?/
def compile_global_template(name_fargs, node)
unless /\A#{NAME_FARGS_PAT}\z/o =~ name_fargs
raise HTree::Error, "invalid template declaration: #{name_fargs}"
end
name = $1
fargs = $2 ? $2.scan(ID_PAT) : []
outvar = gensym('out')
contextvar = gensym('top_context')
args2 = [outvar, contextvar, *fargs]
<<"End"
def #{name}(#{fargs.join(',')})
HTree.parse(_xml_#{name}(#{fargs.join(',')}))
end
def _xml_#{name}(#{fargs.join(',')})
#{outvar} = HTree::Encoder.new(HTree::Encoder.internal_charset)
#{contextvar} = HTree::DefaultContext
_ht_#{name}(#{args2.join(',')})
#{outvar}.finish
end
def _ht_#{name}(#{args2.join(',')})
#{compile_body(outvar, contextvar, node, false)}\
end
public :_ht_#{name}
End
end
def compile_local_template(name_fargs, node, local_templates)
unless /\A#{NAME_FARGS_PAT}\z/o =~ name_fargs
raise HTree::Error, "invalid template declaration: #{name_fargs}"
end
name = $1
fargs = $2 ? $2.scan(ID_PAT) : []
outvar = gensym('out')
contextvar = gensym('top_context')
args2 = [outvar, contextvar, *fargs]
<<"End"
#{name} = lambda {|#{args2.join(',')}|
#{compile_body(outvar, contextvar, node, false, local_templates)}\
}
End
end
def compile_body(outvar, contextvar, node, is_toplevel, local_templates={})
if node.elem? && IGNORABLE_ELEMENTS[node.name] && node.attributes.empty?
node = TemplateNode.new(node.children)
else
node = TemplateNode.new(node)
end
generate_logic_node([:content], node, local_templates).generate_xml_output_code(outvar, contextvar)
end
def compile_node(node, local_templates)
case node
when HTree::Doc
TemplateNode.new(node.children.map {|n| compile_node(n, local_templates) })
when HTree::Elem
ht_attrs = node.attributes.find_all {|name, text| template_attribute? name }
ht_attrs = ht_attrs.sort_by {|htname, text| htname.universal_name }
ignore_tag = false
unless ht_attrs.empty?
attr_mod = {}
ht_attrs.each {|htname, text|
attr_mod[htname] = nil
if /\A_attr_/ =~ htname.local_name
attr_mod[TemplateAttrName.new(htname.namespace_prefix, htname.namespace_uri, $')] = text
end
}
ht_attrs.reject! {|htname, text| /\A_attr_/ =~ htname.local_name }
node = node.subst_subnode(attr_mod)
ignore_tag = IGNORABLE_ELEMENTS[node.name] && node.attributes.empty?
end
ht_names = ht_attrs.map {|htname, text| htname.universal_name }
ht_vals = ht_attrs.map {|htname, text| text.to_s }
case ht_names
when []
generate_logic_node([:tag, [:content]], node, local_templates)
when ['_text'] # or expr
if ht_vals[0] != '_text' # xxx: attribute value is really omitted?
expr = ht_vals[0]
else
children = node.children
if children.length != 1
raise HTree::Error, "_text expression has #{children.length} nodes"
end
if !children[0].text?
raise HTree::Error, "_text expression is not text: #{children[0].class}"
end
expr = children[0].to_s
end
if ignore_tag && /\A\s*'((?:[^'\\]|\\.)*)'\s*\z/m =~ expr
# if expr is just a constant string literal, use it as a literal text.
# This saves dynamic evaluation of
# xxx: handle "..." as well if it has no #{}.
HTree::Text.new($1.gsub(/\\(.)/m, '\1'))
else
generate_logic_node(compile_dynamic_text(ignore_tag, expr), node, local_templates)
end
when ['_tree'] # or expr
if ht_vals[0] != '_tree' # xxx: attribute value is really omitted?
expr = ht_vals[0]
else
children = node.children
if children.length != 1
raise HTree::Error, "_tree expression has #{children.length} nodes"
end
if !children[0].text?
raise HTree::Error, "_tree expression is not text: #{children[0].class}"
end
expr = children[0].to_s
end
generate_logic_node(compile_dynamic_tree(ignore_tag, expr), node, local_templates)
when ['_if'] # ...
generate_logic_node(compile_if(ignore_tag, ht_vals[0], nil), node, local_templates)
when ['_else', '_if'] # ...
generate_logic_node(compile_if(ignore_tag, ht_vals[1], ht_vals[0]), node, local_templates)
when ['_call'] #
generate_logic_node(compile_call(ignore_tag, ht_vals[0]), node, local_templates)
when ['_iter'] # ...
generate_logic_node(compile_iter(ignore_tag, ht_vals[0]), node, local_templates)
when ['_iter_content'] # ...
generate_logic_node(compile_iter_content(ignore_tag, ht_vals[0]), node, local_templates)
else
raise HTree::Error, "unexpected template attributes: #{ht_attrs.inspect}"
end
else
return node
end
end
def valid_syntax?(code)
begin
eval("BEGIN {return true}\n#{code.untaint}")
rescue SyntaxError
raise SyntaxError, "invalid code: #{code}"
end
end
def check_syntax(code)
unless valid_syntax?(code)
raise HTree::Error, "invalid ruby code: #{code}"
end
end
def compile_dynamic_text(ignore_tag, expr)
check_syntax(expr)
logic = [:text, expr]
logic = [:tag, logic] unless ignore_tag
logic
end
def compile_dynamic_tree(ignore_tag, expr)
check_syntax(expr)
logic = [:tree, expr]
logic = [:tag, logic] unless ignore_tag
logic
end
def compile_if(ignore_tag, expr, else_call)
check_syntax(expr)
then_logic = [:content]
unless ignore_tag
then_logic = [:tag, then_logic]
end
else_logic = nil
if else_call
else_logic = compile_call(true, else_call)
end
[:if, expr, then_logic, else_logic]
end
def split_args(spec)
return spec, '' if /\)\z/ !~ spec
i = spec.length - 1
nest = 0
begin
raise HTree::Error, "unmatched paren: #{spec}" if i < 0
case spec[i]
when ?\)
nest += 1
when ?\(
nest -= 1
end
i -= 1
end while nest != 0
i += 1
return spec[0, i], spec[(i+1)...-1]
end
def compile_call(ignore_tag, spec)
# spec : [recv.]meth[(args)]
spec = spec.strip
spec, args = split_args(spec)
unless /#{ID_PAT}\z/o =~ spec
raise HTree::Error, "invalid _call: #{spec}"
end
meth = $&
spec = $`
if /\A\s*\z/ =~ spec
recv = nil
elsif /\A\s*(.*)\.\z/ =~ spec
recv = $1
else
raise HTree::Error, "invalid _call: #{spec}"
end
if recv
check_syntax(recv)
check_syntax("#{recv}.#{meth}(#{args})")
end
check_syntax("#{meth}(#{args})")
[:call, recv, meth, args]
end
def compile_iter(ignore_tag, spec)
# spec: ...
spec = spec.strip
unless %r{\s*//\s*(#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)?\z}o =~ spec
raise HTree::Error, "invalid block arguments for _iter: #{spec}"
end
call = $`.strip
fargs = $1 ? $1.strip : ''
check_syntax("#{call} {|#{fargs}| }")
logic = [:content]
unless ignore_tag
logic = [:tag, logic]
end
[:iter, call, fargs, logic]
end
def compile_iter_content(ignore_tag, spec)
# spec: ...
spec = spec.strip
unless %r{\s*//\s*(#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)?\z}o =~ spec
raise HTree::Error, "invalid block arguments for _iter: #{spec}"
end
call = $`.strip
fargs = $1 ? $1.strip : ''
check_syntax("#{call} {|#{fargs}| }")
logic = [:content]
logic = [:iter, call, fargs, logic]
unless ignore_tag
logic = [:tag, logic]
end
logic
end
def generate_logic_node(logic, node, local_templates)
# logic ::= [:if, expr, then_logic, else_logic]
# | [:iter, call, fargs, logic]
# | [:tag, logic]
# | [:text, expr]
# | [:tree, expr]
# | [:call, expr, meth, args]
# | [:content]
# | [:empty]
case logic.first
when :empty
nil
when :content
subtemplates = []
children = []
node.children.each {|c|
children << extract_templates(c, subtemplates, false)
}
if subtemplates.empty?
TemplateNode.new(node.children.map {|n|
compile_node(n, local_templates)
})
else
local_templates = local_templates.dup
decl = ''
subtemplates.each {|sub_name_args, sub_node|
sub_name = sub_name_args[ID_PAT]
local_templates[sub_name] = sub_name
decl << "#{sub_name} = "
}
decl << "nil\n"
defs = []
subtemplates.each {|sub_name_args, sub_node|
defs << lambda {|out, context|
out.output_logic_line compile_local_template(sub_name_args, sub_node, local_templates)
}
}
TemplateNode.new(
lambda {|out, context| out.output_logic_line decl },
defs,
children.map {|n| compile_node(n, local_templates) }
)
end
when :text
_, expr = logic
TemplateNode.new(lambda {|out, context| out.output_dynamic_text expr })
when :tree
_, expr = logic
TemplateNode.new(lambda {|out, context| out.output_dynamic_tree expr, make_context_expr(out, context) })
when :tag
_, rest_logic = logic
if rest_logic == [:content] && node.empty_element?
node
else
subst = {}
node.children.each_index {|i| subst[i] = nil }
subst[0] = TemplateNode.new(generate_logic_node(rest_logic, node, local_templates))
node.subst_subnode(subst)
end
when :if
_, expr, then_logic, else_logic = logic
children = [
lambda {|out, context| out.output_logic_line "if (#{expr})" },
generate_logic_node(then_logic, node, local_templates)
]
if else_logic
children.concat [
lambda {|out, context| out.output_logic_line "else" },
generate_logic_node(else_logic, node, local_templates)
]
end
children <<
lambda {|out, context| out.output_logic_line "end" }
TemplateNode.new(*children)
when :iter
_, call, fargs, rest_logic = logic
TemplateNode.new(
lambda {|out, context| out.output_logic_line "#{call} {|#{fargs}|" },
generate_logic_node(rest_logic, node, local_templates),
lambda {|out, context| out.output_logic_line "}" }
)
when :call
_, recv, meth, args = logic
TemplateNode.new(
lambda {|out, context|
as = [out.outvar, ", ", make_context_expr(out, context)]
unless args.empty?
as << ", " << args
end
if recv
out.output_logic_line "(#{recv})._ht_#{meth}(#{as.join('')})"
elsif local_templates.include? meth
out.output_logic_line "#{meth}.call(#{as.join('')})"
else
out.output_logic_line "_ht_#{meth}(#{as.join('')})"
end
}
)
else
raise Exception, "[bug] invalid logic: #{logic.inspect}"
end
end
def make_context_expr(out, context)
ns = context.namespaces.reject {|k, v| HTree::Context::DefaultNamespaces[k] == v }
if ns.empty?
result = out.contextvar
else
result = "#{out.contextvar}.subst_namespaces("
sep = ''
ns.each {|k, v|
result << sep << (k ? k.dump : "nil") << '=>' << v.dump
sep = ', '
}
result << ")"
end
result
end
class TemplateNode
include HTree::Node
def initialize(*children)
@children = children.flatten.compact
end
attr_reader :children
def output(out, context)
@children.each {|c|
if c.respond_to? :call
c.call(out, context)
else
c.output(out, context)
end
}
end
end
class TemplateAttrName < HTree::Name
def output_attribute(text, out, context)
output(out, context)
out.output_string '="'
out.output_dynamic_attvalue(text.to_s)
out.output_string '"'
end
end
end
# :startdoc:
htree-0.8/htree/rexml.rb 0000644 0001750 0001750 00000006242 11747021106 014264 0 ustar jonas jonas # = REXML Tree Generator
#
# HTree::Node#to_rexml is used for converting HTree to REXML.
#
# == Method Summary
#
# - HTree::Node#to_rexml -> REXML::Child
#
# == Example
#
# HTree.parse(...).to_rexml #=> REXML::Document
#
# == Comparison between HTree and REXML.
#
# - HTree parser is permissive HTML/XML parser.
# REXML parser is strict XML parser.
# HTree is recommended if you need to parse realworld HTML.
# REXML is recommended if you need strict error checking.
# - HTree object is immutable.
# REXML object is mutable.
# REXML should be used if you need modification.
#
require 'htree/modules'
require 'htree/output' # HTree::DocType#generate_content
module HTree
module Node
# convert to REXML tree.
def to_rexml
require 'rexml/document'
to_rexml_internal(nil, DefaultContext)
end
end
# :stopdoc:
class Doc
def to_rexml_internal(parent, context)
raise ArgumentError, "parent must be nil" if parent != nil
result = REXML::Document.new
self.children.each {|c|
c.to_rexml_internal(result, context)
}
result
end
end
class Elem
def to_rexml_internal(parent, context)
ename = self.element_name
ns_decl = {}
if context.namespace_uri(ename.namespace_prefix) != ename.namespace_uri
ns_decl[ename.namespace_prefix] = ename.namespace_uri
end
if ename.namespace_prefix
result = REXML::Element.new("#{ename.namespace_prefix}:#{ename.local_name}", parent)
else
result = REXML::Element.new(ename.local_name, parent)
end
self.each_attribute {|aname, atext|
if aname.namespace_prefix
if context.namespace_uri(aname.namespace_prefix) != aname.namespace_uri
ns_decl[aname.namespace_prefix] = aname.namespace_uri
end
result.add_attribute("#{aname.namespace_prefix}:#{aname.local_name}", atext.to_s)
else
result.add_attribute(aname.local_name, atext.to_s)
end
}
ns_decl.each {|k, v|
if k
result.add_namespace(k, v)
else
result.add_namespace(v)
end
}
context = context.subst_namespaces(ns_decl)
self.children.each {|c|
c.to_rexml_internal(result, context)
}
result
end
end
class Text
def to_rexml_internal(parent, context)
rcdata = self.rcdata.gsub(/[<>]/) { Encoder::ChRef[$&] }
REXML::Text.new(rcdata, true, parent, true)
end
end
class XMLDecl
def to_rexml_internal(parent, context)
r = REXML::XMLDecl.new(self.version, self.encoding, self.standalone)
parent << r if parent
r
end
end
class DocType
def to_rexml_internal(parent, context)
REXML::DocType.new([self.root_element_name, self.generate_content], parent)
end
end
class ProcIns
def to_rexml_internal(parent, context)
r = REXML::Instruction.new(self.target, self.content)
parent << r if parent
r
end
end
class Comment
def to_rexml_internal(parent, context)
REXML::Comment.new(self.content, parent)
end
end
class BogusETag
def to_rexml_internal(parent, context)
nil
end
end
# :startdoc:
end
htree-0.8/htree/modules.rb 0000644 0001750 0001750 00000004074 11747021106 014606 0 ustar jonas jonas module HTree
class Name; include HTree end
class Context; include HTree end
# :stopdoc:
module Tag; include HTree end
class STag; include Tag end
class ETag; include Tag end
# :startdoc:
module Node; include HTree end
module Container; include Node end
class Doc; include Container end
class Elem; include Container end
module Leaf; include Node end
class Text; include Leaf end
class XMLDecl; include Leaf end
class DocType; include Leaf end
class ProcIns; include Leaf end
class Comment; include Leaf end
class BogusETag; include Leaf end
module Traverse end
module Container::Trav; include Traverse end
module Leaf::Trav; include Traverse end
class Doc; module Trav; include Container::Trav end; include Trav end
class Elem; module Trav; include Container::Trav end; include Trav end
class Text; module Trav; include Leaf::Trav end; include Trav end
class XMLDecl; module Trav; include Leaf::Trav end; include Trav end
class DocType; module Trav; include Leaf::Trav end; include Trav end
class ProcIns; module Trav; include Leaf::Trav end; include Trav end
class Comment; module Trav; include Leaf::Trav end; include Trav end
class BogusETag; module Trav; include Leaf::Trav end; include Trav end
class Location; include HTree end
module Container::Loc end
module Leaf::Loc end
class Doc; class Loc < Location; include Trav, Container::Loc end end
class Elem; class Loc < Location; include Trav, Container::Loc end end
class Text; class Loc < Location; include Trav, Leaf::Loc end end
class XMLDecl; class Loc < Location; include Trav, Leaf::Loc end end
class DocType; class Loc < Location; include Trav, Leaf::Loc end end
class ProcIns; class Loc < Location; include Trav, Leaf::Loc end end
class Comment; class Loc < Location; include Trav, Leaf::Loc end end
class BogusETag; class Loc < Location; include Trav, Leaf::Loc end end
class Error < StandardError; end
end
htree-0.8/htree/htmlinfo.rb 0000644 0001750 0001750 00000110566 11747021106 014762 0 ustar jonas jonas module HTree
# The code below is auto-generated. Don't edit manually.
# :stopdoc:
NamedCharacters =
{"AElig"=>198, "Aacute"=>193, "Acirc"=>194, "Agrave"=>192, "Alpha"=>913,
"Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199,
"Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201,
"Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203,
"Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921,
"Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925,
"OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937,
"Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934,
"Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931,
"THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219,
"Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221,
"Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180,
"aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38,
"and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776,
"atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166,
"bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162,
"chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169,
"crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224,
"darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247,
"eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195,
"ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240,
"euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704,
"frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947,
"ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829,
"hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236,
"image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191,
"isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955,
"lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220,
"le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206,
"lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212,
"micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711,
"nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713,
"nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244,
"oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959,
"oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248,
"otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706,
"permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982,
"plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733,
"psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002,
"raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476,
"reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250,
"rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167,
"shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824,
"sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178,
"sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756,
"theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732,
"times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593,
"ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965,
"uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165,
"yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204}
NamedCharactersPattern = /\A(?-mix:AElig|Aacute|Acirc|Agrave|Alpha|Aring|Atilde|Auml|Beta|Ccedil|Chi|Dagger|Delta|ETH|Eacute|Ecirc|Egrave|Epsilon|Eta|Euml|Gamma|Iacute|Icirc|Igrave|Iota|Iuml|Kappa|Lambda|Mu|Ntilde|Nu|OElig|Oacute|Ocirc|Ograve|Omega|Omicron|Oslash|Otilde|Ouml|Phi|Pi|Prime|Psi|Rho|Scaron|Sigma|THORN|Tau|Theta|Uacute|Ucirc|Ugrave|Upsilon|Uuml|Xi|Yacute|Yuml|Zeta|aacute|acirc|acute|aelig|agrave|alefsym|alpha|amp|and|ang|apos|aring|asymp|atilde|auml|bdquo|beta|brvbar|bull|cap|ccedil|cedil|cent|chi|circ|clubs|cong|copy|crarr|cup|curren|dArr|dagger|darr|deg|delta|diams|divide|eacute|ecirc|egrave|empty|emsp|ensp|epsilon|equiv|eta|eth|euml|euro|exist|fnof|forall|frac12|frac14|frac34|frasl|gamma|ge|gt|hArr|harr|hearts|hellip|iacute|icirc|iexcl|igrave|image|infin|int|iota|iquest|isin|iuml|kappa|lArr|lambda|lang|laquo|larr|lceil|ldquo|le|lfloor|lowast|loz|lrm|lsaquo|lsquo|lt|macr|mdash|micro|middot|minus|mu|nabla|nbsp|ndash|ne|ni|not|notin|nsub|ntilde|nu|oacute|ocirc|oelig|ograve|oline|omega|omicron|oplus|or|ordf|ordm|oslash|otilde|otimes|ouml|para|part|permil|perp|phi|pi|piv|plusmn|pound|prime|prod|prop|psi|quot|rArr|radic|rang|raquo|rarr|rceil|rdquo|real|reg|rfloor|rho|rlm|rsaquo|rsquo|sbquo|scaron|sdot|sect|shy|sigma|sigmaf|sim|spades|sub|sube|sum|sup|sup1|sup2|sup3|supe|szlig|tau|there4|theta|thetasym|thinsp|thorn|tilde|times|trade|uArr|uacute|uarr|ucirc|ugrave|uml|upsih|upsilon|uuml|weierp|xi|yacute|yen|yuml|zeta|zwj|zwnj)\z/
ElementContent =
{"h6"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"object"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q",
"s", "samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"dl"=>["dd", "dt"],
"p"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"acronym"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"code"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"ul"=>["li"],
"tt"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"label"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"form"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"q"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"thead"=>["tr"],
"area"=>:EMPTY,
"td"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"title"=>[],
"dir"=>["li"],
"s"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"ol"=>["li"],
"hr"=>:EMPTY,
"applet"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q",
"s", "samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"table"=>["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"],
"legend"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"cite"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"a"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"html"=>
["a", "abbr", "acronym", "address", "applet", "b", "base", "basefont", "bdo",
"big", "blockquote", "body", "br", "button", "center", "cite", "code",
"dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2",
"h3", "h4", "h5", "h6", "head", "hr", "i", "iframe", "img", "input",
"isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object",
"ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span",
"strike", "strong", "sub", "sup", "table", "textarea", "title", "tt", "u",
"ul", "var"],
"u"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"blockquote"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"center"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"b"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"base"=>:EMPTY,
"th"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"link"=>:EMPTY,
"var"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"samp"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"div"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"textarea"=>[],
"pre"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"head"=>["base", "isindex", "title"],
"span"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"br"=>:EMPTY,
"script"=>:CDATA,
"noframes"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"style"=>:CDATA,
"meta"=>:EMPTY,
"dt"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"option"=>[],
"kbd"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"big"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"tfoot"=>["tr"],
"sup"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"bdo"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"isindex"=>:EMPTY,
"dfn"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"fieldset"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "legend",
"map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"em"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"font"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"tbody"=>["tr"],
"noscript"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"li"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"col"=>:EMPTY,
"small"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"dd"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"i"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"menu"=>["li"],
"strong"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"basefont"=>:EMPTY,
"img"=>:EMPTY,
"optgroup"=>["option"],
"map"=>
["address", "area", "blockquote", "center", "dir", "div", "dl", "fieldset",
"form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu",
"noframes", "noscript", "ol", "p", "pre", "table", "ul"],
"h1"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"address"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "p", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"sub"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"param"=>:EMPTY,
"input"=>:EMPTY,
"h2"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"abbr"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"h3"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"strike"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"body"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"ins"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"button"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"h4"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"select"=>["optgroup", "option"],
"caption"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"colgroup"=>["col"],
"tr"=>["td", "th"],
"del"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"h5"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"iframe"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"]}
ElementInclusions =
{"head"=>["link", "meta", "object", "script", "style"], "body"=>["del", "ins"]}
ElementExclusions =
{"button"=>
["a", "button", "fieldset", "form", "iframe", "input", "isindex", "label",
"select", "textarea"],
"a"=>["a"],
"dir"=>
["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form",
"h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes",
"noscript", "ol", "p", "pre", "table", "ul"],
"title"=>["link", "meta", "object", "script", "style"],
"pre"=>
["applet", "basefont", "big", "font", "img", "object", "small", "sub",
"sup"],
"form"=>["form"],
"menu"=>
["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form",
"h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes",
"noscript", "ol", "p", "pre", "table", "ul"],
"label"=>["label"]}
OmittedAttrName =
{"h6"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"object"=>
{"bottom"=>"align", "declare"=>"declare", "left"=>"align", "ltr"=>"dir",
"middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"},
"dl"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
"p"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"acronym"=>{"ltr"=>"dir", "rtl"=>"dir"},
"code"=>{"ltr"=>"dir", "rtl"=>"dir"},
"ul"=>
{"circle"=>"type", "compact"=>"compact", "disc"=>"type", "ltr"=>"dir",
"rtl"=>"dir", "square"=>"type"},
"tt"=>{"ltr"=>"dir", "rtl"=>"dir"},
"label"=>{"ltr"=>"dir", "rtl"=>"dir"},
"form"=>{"get"=>"method", "ltr"=>"dir", "post"=>"method", "rtl"=>"dir"},
"q"=>{"ltr"=>"dir", "rtl"=>"dir"},
"thead"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"area"=>
{"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "nohref"=>"nohref",
"poly"=>"shape", "rect"=>"shape", "rtl"=>"dir"},
"td"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align",
"left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap",
"right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir",
"top"=>"valign"},
"title"=>{"ltr"=>"dir", "rtl"=>"dir"},
"dir"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
"s"=>{"ltr"=>"dir", "rtl"=>"dir"},
"ol"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
"hr"=>
{"center"=>"align", "left"=>"align", "ltr"=>"dir", "noshade"=>"noshade",
"right"=>"align", "rtl"=>"dir"},
"applet"=>
{"bottom"=>"align", "left"=>"align", "middle"=>"align", "right"=>"align",
"top"=>"align"},
"table"=>
{"above"=>"frame", "all"=>"rules", "below"=>"frame", "border"=>"frame",
"box"=>"frame", "center"=>"align", "cols"=>"rules", "groups"=>"rules",
"hsides"=>"frame", "left"=>"align", "lhs"=>"frame", "ltr"=>"dir",
"none"=>"rules", "rhs"=>"frame", "right"=>"align", "rows"=>"rules",
"rtl"=>"dir", "void"=>"frame", "vsides"=>"frame"},
"legend"=>
{"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align",
"rtl"=>"dir", "top"=>"align"},
"cite"=>{"ltr"=>"dir", "rtl"=>"dir"},
"a"=>
{"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "poly"=>"shape",
"rect"=>"shape", "rtl"=>"dir"},
"html"=>{"ltr"=>"dir", "rtl"=>"dir"},
"u"=>{"ltr"=>"dir", "rtl"=>"dir"},
"blockquote"=>{"ltr"=>"dir", "rtl"=>"dir"},
"center"=>{"ltr"=>"dir", "rtl"=>"dir"},
"b"=>{"ltr"=>"dir", "rtl"=>"dir"},
"th"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align",
"left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap",
"right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir",
"top"=>"valign"},
"link"=>{"ltr"=>"dir", "rtl"=>"dir"},
"var"=>{"ltr"=>"dir", "rtl"=>"dir"},
"samp"=>{"ltr"=>"dir", "rtl"=>"dir"},
"div"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"textarea"=>
{"disabled"=>"disabled", "ltr"=>"dir", "readonly"=>"readonly", "rtl"=>"dir"},
"pre"=>{"ltr"=>"dir", "rtl"=>"dir"},
"head"=>{"ltr"=>"dir", "rtl"=>"dir"},
"span"=>{"ltr"=>"dir", "rtl"=>"dir"},
"br"=>{"all"=>"clear", "left"=>"clear", "none"=>"clear", "right"=>"clear"},
"script"=>{"defer"=>"defer"},
"noframes"=>{"ltr"=>"dir", "rtl"=>"dir"},
"style"=>{"ltr"=>"dir", "rtl"=>"dir"},
"meta"=>{"ltr"=>"dir", "rtl"=>"dir"},
"dt"=>{"ltr"=>"dir", "rtl"=>"dir"},
"option"=>
{"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir", "selected"=>"selected"},
"kbd"=>{"ltr"=>"dir", "rtl"=>"dir"},
"big"=>{"ltr"=>"dir", "rtl"=>"dir"},
"tfoot"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"sup"=>{"ltr"=>"dir", "rtl"=>"dir"},
"bdo"=>{"ltr"=>"dir", "rtl"=>"dir"},
"isindex"=>{"ltr"=>"dir", "rtl"=>"dir"},
"dfn"=>{"ltr"=>"dir", "rtl"=>"dir"},
"fieldset"=>{"ltr"=>"dir", "rtl"=>"dir"},
"em"=>{"ltr"=>"dir", "rtl"=>"dir"},
"font"=>{"ltr"=>"dir", "rtl"=>"dir"},
"tbody"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"noscript"=>{"ltr"=>"dir", "rtl"=>"dir"},
"li"=>{"ltr"=>"dir", "rtl"=>"dir"},
"col"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"small"=>{"ltr"=>"dir", "rtl"=>"dir"},
"dd"=>{"ltr"=>"dir", "rtl"=>"dir"},
"i"=>{"ltr"=>"dir", "rtl"=>"dir"},
"menu"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
"strong"=>{"ltr"=>"dir", "rtl"=>"dir"},
"img"=>
{"bottom"=>"align", "ismap"=>"ismap", "left"=>"align", "ltr"=>"dir",
"middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"},
"optgroup"=>{"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir"},
"map"=>{"ltr"=>"dir", "rtl"=>"dir"},
"address"=>{"ltr"=>"dir", "rtl"=>"dir"},
"h1"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"sub"=>{"ltr"=>"dir", "rtl"=>"dir"},
"param"=>{"data"=>"valuetype", "object"=>"valuetype", "ref"=>"valuetype"},
"input"=>
{"bottom"=>"align", "button"=>"type", "checkbox"=>"type",
"checked"=>"checked", "disabled"=>"disabled", "file"=>"type",
"hidden"=>"type", "image"=>"type", "ismap"=>"ismap", "left"=>"align",
"ltr"=>"dir", "middle"=>"align", "password"=>"type", "radio"=>"type",
"readonly"=>"readonly", "reset"=>"type", "right"=>"align", "rtl"=>"dir",
"submit"=>"type", "text"=>"type", "top"=>"align"},
"h2"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"abbr"=>{"ltr"=>"dir", "rtl"=>"dir"},
"h3"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"strike"=>{"ltr"=>"dir", "rtl"=>"dir"},
"body"=>{"ltr"=>"dir", "rtl"=>"dir"},
"ins"=>{"ltr"=>"dir", "rtl"=>"dir"},
"button"=>
{"button"=>"type", "disabled"=>"disabled", "ltr"=>"dir", "reset"=>"type",
"rtl"=>"dir", "submit"=>"type"},
"h4"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"select"=>
{"disabled"=>"disabled", "ltr"=>"dir", "multiple"=>"multiple", "rtl"=>"dir"},
"caption"=>
{"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align",
"rtl"=>"dir", "top"=>"align"},
"colgroup"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"tr"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"del"=>{"ltr"=>"dir", "rtl"=>"dir"},
"h5"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"iframe"=>
{"0"=>"frameborder", "1"=>"frameborder", "auto"=>"scrolling",
"bottom"=>"align", "left"=>"align", "middle"=>"align", "no"=>"scrolling",
"right"=>"align", "top"=>"align", "yes"=>"scrolling"}}
# :startdoc:
# The code above is auto-generated. Don't edit manually.
end
htree-0.8/htree/traverse.rb 0000644 0001750 0001750 00000035556 11747021106 015002 0 ustar jonas jonas require 'htree/doc'
require 'htree/elem'
require 'htree/loc'
require 'htree/extract_text'
require 'uri'
module HTree
module Traverse
def doc?() Doc::Trav === self end
def elem?() Elem::Trav === self end
def text?() Text::Trav === self end
def xmldecl?() XMLDecl::Trav === self end
def doctype?() DocType::Trav === self end
def procins?() ProcIns::Trav === self end
def comment?() Comment::Trav === self end
def bogusetag?() BogusETag::Trav === self end
def get_subnode(*indexes)
n = self
indexes.each {|index|
n = n.get_subnode_internal(index)
}
n
end
end
module Container::Trav
# +each_child+ iterates over each child.
def each_child(&block) # :yields: child_node
children.each(&block)
nil
end
# +each_child_with_index+ iterates over each child.
def each_child_with_index(&block) # :yields: child_node, index
children.each_with_index(&block)
nil
end
# +find_element+ searches an element which universal name is specified by
# the arguments.
# It returns nil if not found.
def find_element(*names)
traverse_element(*names) {|e| return e }
nil
end
# +traverse_element+ traverses elements in the tree.
# It yields elements in depth first order.
#
# If _names_ are empty, it yields all elements.
# If non-empty _names_ are given, it should be list of universal names.
#
# A nested element is yielded in depth first order as follows.
#
# t = HTree(' ')
# t.traverse_element("a", "c") {|e| p e}
# # =>
# {elem {elem {emptyelem } } {emptyelem } }
# {emptyelem }
# {emptyelem }
#
# Universal names are specified as follows.
#
# t = HTree(<<'End')
#
#
#
#
# End
# t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
# # =>
# {emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">}
# {emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}
#
def traverse_element(*names, &block) # :yields: element
if names.empty?
traverse_all_element(&block)
else
name_set = {}
names.each {|n| name_set[n] = true }
traverse_some_element(name_set, &block)
end
nil
end
def each_hyperlink_attribute
traverse_element(
'{http://www.w3.org/1999/xhtml}a',
'{http://www.w3.org/1999/xhtml}area',
'{http://www.w3.org/1999/xhtml}link',
'{http://www.w3.org/1999/xhtml}img',
'{http://www.w3.org/1999/xhtml}object',
'{http://www.w3.org/1999/xhtml}q',
'{http://www.w3.org/1999/xhtml}blockquote',
'{http://www.w3.org/1999/xhtml}ins',
'{http://www.w3.org/1999/xhtml}del',
'{http://www.w3.org/1999/xhtml}form',
'{http://www.w3.org/1999/xhtml}input',
'{http://www.w3.org/1999/xhtml}head',
'{http://www.w3.org/1999/xhtml}base',
'{http://www.w3.org/1999/xhtml}script') {|elem|
case elem.name
when %r{\{http://www.w3.org/1999/xhtml\}(?:base|a|area|link)\z}i
attrs = ['href']
when %r{\{http://www.w3.org/1999/xhtml\}(?:img)\z}i
attrs = ['src', 'longdesc', 'usemap']
when %r{\{http://www.w3.org/1999/xhtml\}(?:object)\z}i
attrs = ['classid', 'codebase', 'data', 'usemap']
when %r{\{http://www.w3.org/1999/xhtml\}(?:q|blockquote|ins|del)\z}i
attrs = ['cite']
when %r{\{http://www.w3.org/1999/xhtml\}(?:form)\z}i
attrs = ['action']
when %r{\{http://www.w3.org/1999/xhtml\}(?:input)\z}i
attrs = ['src', 'usemap']
when %r{\{http://www.w3.org/1999/xhtml\}(?:head)\z}i
attrs = ['profile']
when %r{\{http://www.w3.org/1999/xhtml\}(?:script)\z}i
attrs = ['src', 'for']
end
attrs.each {|attr|
if hyperlink = elem.get_attribute(attr)
yield elem, attr, hyperlink
end
}
}
end
private :each_hyperlink_attribute
# +each_hyperlink_uri+ traverses hyperlinks such as HTML href attribute
# of A element.
#
# It yields HTree::Text (or HTree::Loc) and URI for each hyperlink.
#
# The URI objects are created with a base URI which is given by
# HTML BASE element or the argument ((|base_uri|)).
# +each_hyperlink_uri+ doesn't yields href of the BASE element.
def each_hyperlink_uri(base_uri=nil) # :yields: hyperlink, uri
base_uri = URI.parse(base_uri) if String === base_uri
links = []
each_hyperlink_attribute {|elem, attr, hyperlink|
if %r{\{http://www.w3.org/1999/xhtml\}(?:base)\z}i =~ elem.name
base_uri = URI.parse(hyperlink.to_s)
else
links << hyperlink
end
}
if base_uri
links.each {|hyperlink| yield hyperlink, base_uri + hyperlink.to_s }
else
links.each {|hyperlink| yield hyperlink, URI.parse(hyperlink.to_s) }
end
end
# +each_hyperlink+ traverses hyperlinks such as HTML href attribute
# of A element.
#
# It yields HTree::Text or HTree::Loc.
#
# Note that +each_hyperlink+ yields HTML href attribute of BASE element.
def each_hyperlink # :yields: text
each_hyperlink_attribute {|elem, attr, hyperlink|
yield hyperlink
}
end
# +each_uri+ traverses hyperlinks such as HTML href attribute
# of A element.
#
# It yields URI for each hyperlink.
#
# The URI objects are created with a base URI which is given by
# HTML BASE element or the argument ((|base_uri|)).
def each_uri(base_uri=nil) # :yields: URI
each_hyperlink_uri(base_uri) {|hyperlink, uri| yield uri }
end
end
# :stopdoc:
module Doc::Trav
def traverse_all_element(&block)
children.each {|c| c.traverse_all_element(&block) }
end
end
module Elem::Trav
def traverse_all_element(&block)
yield self
children.each {|c| c.traverse_all_element(&block) }
end
end
module Leaf::Trav
def traverse_all_element
end
end
module Doc::Trav
def traverse_some_element(name_set, &block)
children.each {|c| c.traverse_some_element(name_set, &block) }
end
end
module Elem::Trav
def traverse_some_element(name_set, &block)
yield self if name_set.include? self.name
children.each {|c| c.traverse_some_element(name_set, &block) }
end
end
module Leaf::Trav
def traverse_some_element(name_set)
end
end
# :startdoc:
module Traverse
# +traverse_text+ traverses texts in the tree
def traverse_text(&block) # :yields: text
traverse_text_internal(&block)
nil
end
end
# :stopdoc:
module Container::Trav
def traverse_text_internal(&block)
each_child {|c| c.traverse_text_internal(&block) }
end
end
module Leaf::Trav
def traverse_text_internal
end
end
module Text::Trav
def traverse_text_internal
yield self
end
end
# :startdoc:
module Container::Trav
# +filter+ rebuilds the tree without some components.
#
# node.filter {|descendant_node| predicate } -> node
# loc.filter {|descendant_loc| predicate } -> node
#
# +filter+ yields each node except top node.
# If given block returns false, corresponding node is dropped.
# If given block returns true, corresponding node is retained and
# inner nodes are examined.
#
# +filter+ returns an node.
# It doesn't return location object even if self is location object.
#
def filter(&block)
subst = {}
each_child_with_index {|descendant, i|
if yield descendant
if descendant.elem?
subst[i] = descendant.filter(&block)
else
subst[i] = descendant
end
else
subst[i] = nil
end
}
to_node.subst_subnode(subst)
end
end
module Doc::Trav
# +title+ searches title and return it as a text.
# It returns nil if not found.
#
# +title+ searchs following information.
#
# - ... in HTML
# - ... in RSS
# - ... in Atom
def title
e = find_element('title',
'{http://www.w3.org/1999/xhtml}title',
'{http://purl.org/rss/1.0/}title',
'{http://my.netscape.com/rdf/simple/0.9/}title',
'{http://www.w3.org/2005/Atom}title',
'{http://purl.org/atom/ns#}title')
e && e.extract_text
end
# +author+ searches author and return it as a text.
# It returns nil if not found.
#
# +author+ searchs following information.
#
# - in HTML
# - in HTML
# - author-name in RSS
# - author-name in RSS
# - author-name in Atom
def author
traverse_element('meta',
'{http://www.w3.org/1999/xhtml}meta') {|e|
begin
next unless e.fetch_attr('name').downcase == 'author'
author = e.fetch_attribute('content').strip
return author if !author.empty?
rescue IndexError
end
}
traverse_element('link',
'{http://www.w3.org/1999/xhtml}link') {|e|
begin
next unless e.fetch_attr('rev').downcase == 'made'
author = e.fetch_attribute('title').strip
return author if !author.empty?
rescue IndexError
end
}
if channel = find_element('{http://purl.org/rss/1.0/}channel')
channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e|
begin
author = e.extract_text.strip
return author if !author.empty?
rescue IndexError
end
}
channel.traverse_element('{http://purl.org/dc/elements/1.1/}publisher') {|e|
begin
author = e.extract_text.strip
return author if !author.empty?
rescue IndexError
end
}
end
['http://www.w3.org/2005/Atom', 'http://purl.org/atom/ns#'].each {|xmlns|
each_child {|top|
next unless top.elem?
if top.name == "{#{xmlns}}feed"
if feed_author = find_element("{#{xmlns}}author")
feed_author.traverse_element("{#{xmlns}}name") {|e|
begin
author = e.extract_text.strip
return author if !author.empty?
rescue IndexError
end
}
end
end
}
}
nil
end
end
module Doc::Trav
# +root+ searches root element.
# If there is no element on top level, it raise HTree::Error.
# If there is two or more elements on top level, it raise HTree::Error.
def root
es = []
children.each {|c| es << c if c.elem? }
raise HTree::Error, "no element" if es.empty?
raise HTree::Error, "multiple top elements" if 1 < es.length
es[0]
end
# +has_xmldecl?+ returns true if there is an XML declaration on top level.
def has_xmldecl?
children.each {|c| return true if c.xmldecl? }
false
end
end
module Elem::Trav
# +name+ returns the universal name of the element as a string.
#
# p HTree(' ').root.name
# # =>
# "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF"
#
def name() element_name.universal_name end
# +qualified_name+ returns the qualified name of the element as a string.
#
# p HTree(' ').root.qualified_name
# # =>
# "rdf:RDF"
def qualified_name() element_name.qualified_name end
# +attributes+ returns attributes as a hash.
# The hash keys are HTree::Name objects.
# The hash values are HTree::Text or HTree::Location objects.
#
# p HTree('').root.attributes
# # =>
# {href=>{text "uu"}, name=>{text "xx"}}
#
# p HTree(' ').make_loc.root.attributes
# # =>
# {href=>#, name=>#}
#
def attributes
result = {}
each_attribute {|name, text|
result[name] = text
}
result
end
def each_attr
each_attribute {|name, text|
uname = name.universal_name
str = text.to_s
yield uname, str
}
end
# call-seq:
# elem.fetch_attribute(name) -> text or raise IndexError
# elem.fetch_attribute(name, default) -> text or default
# elem.fetch_attribute(name) {|uname| default } -> text or default
#
# +fetch_attribute+ returns an attribute value as a text.
#
# elem may be an instance of HTree::Elem or a location points to it.
def fetch_attribute(uname, *rest, &block)
if 1 < rest.length
raise ArgumentError, "wrong number of arguments (#{1+rest.length} for 2)"
end
if !rest.empty? && block_given?
raise ArgumentError, "block supersedes default value argument"
end
uname = uname.universal_name if uname.respond_to? :universal_name
return update_attribute_hash.fetch(uname) {
if block_given?
return yield(uname)
elsif !rest.empty?
return rest[0]
else
raise IndexError, "attribute not found: #{uname.inspect}"
end
}
end
# call-seq:
# elem.fetch_attr(name) -> string or raise IndexError
# elem.fetch_attr(name, default) -> string or default
# elem.fetch_attr(name) {|uname| default } -> string or default
#
# +fetch_attr+ returns an attribute value as a string.
#
# elem may be an instance of HTree::Elem or a location points to it.
def fetch_attr(uname, *rest, &block)
if 1 < rest.length
raise ArgumentError, "wrong number of arguments (#{1+rest.length} for 2)"
end
if !rest.empty? && block_given?
raise ArgumentError, "block supersedes default value argument"
end
uname = uname.universal_name if uname.respond_to? :universal_name
return update_attribute_hash.fetch(uname) {
if block_given?
return yield(uname)
elsif !rest.empty?
return rest[0]
else
raise IndexError, "attribute not found: #{uname.inspect}"
end
}.to_s
end
def get_attribute(uname)
uname = uname.universal_name if uname.respond_to? :universal_name
update_attribute_hash[uname]
end
def get_attr(uname)
if text = update_attribute_hash[uname]
text.to_s
else
nil
end
end
end
end
htree-0.8/htree/leaf.rb 0000644 0001750 0001750 00000005257 11747021106 014051 0 ustar jonas jonas require 'htree/modules'
require 'htree/raw_string'
module HTree
class XMLDecl
def initialize(version, encoding=nil, standalone=nil)
init_raw_string
if /\A[a-zA-Z0-9_.:-]+\z/ !~ version
raise HTree::Error, "invalid version in XML declaration: #{version.inspect}"
end
if encoding && /\A[A-Za-z][A-Za-z0-9._-]*\z/ !~ encoding
raise HTree::Error, "invalid encoding in XML declaration: #{encoding.inspect}"
end
unless standalone == nil || standalone == true || standalone == false
raise HTree::Error, "invalid standalone document declaration in XML declaration: #{standalone.inspect}"
end
@version = version
@encoding = encoding
@standalone = standalone
end
attr_reader :version, :encoding, :standalone
end
class DocType
def initialize(root_element_name, public_identifier=nil, system_identifier=nil)
init_raw_string
if public_identifier && /\A[ \x0d\x0aa-zA-Z0-9\-'()+,.\/:=?;!*\#@$_%]*\z/ !~ public_identifier
raise HTree::Error, "invalid public identifier in document type declaration: #{public_identifier.inspect}"
end
if system_identifier && /"/ =~ system_identifier && /'/ =~ system_identifier
raise HTree::Error, "invalid system identifier in document type declaration: #{system_identifier.inspect}"
end
@root_element_name = root_element_name
@public_identifier = public_identifier
@system_identifier = system_identifier
end
attr_reader :root_element_name, :public_identifier, :system_identifier
end
class ProcIns
# :stopdoc:
class << self
alias new! new
end
# :startdoc:
def ProcIns.new(target, content)
content = content.gsub(/\?>/, '? >') if content
new! target, content
end
def initialize(target, content) # :notnew:
init_raw_string
if content && /\?>/ =~ content
raise HTree::Error, "invalid processing instruction content: #{content.inspect}"
end
@target = target
@content = content
end
attr_reader :target, :content
end
class Comment
# :stopdoc:
class << self
alias new! new
end
# :startdoc:
def Comment.new(content)
content = content.gsub(/-(-+)/) { '-' + ' -' * $1.length }.sub(/-\z/, '- ')
new! content
end
def initialize(content) # :notnew:
init_raw_string
if /--/ =~ content || /-\z/ =~ content
raise HTree::Error, "invalid comment content: #{content.inspect}"
end
@content = content
end
attr_reader :content
end
class BogusETag
def initialize(qualified_name)
init_raw_string
@etag = ETag.new(qualified_name)
end
end
end
htree-0.8/htree/raw_string.rb 0000644 0001750 0001750 00000004772 11747021106 015322 0 ustar jonas jonas require 'htree/modules'
require 'htree/fstr'
module HTree
module Node
# raw_string returns a source string recorded by parsing.
# It returns +nil+ if the node is constructed not via parsing.
def raw_string
catch(:raw_string_tag) {
return raw_string_internal('')
}
nil
end
end
# :stopdoc:
class Doc
def raw_string_internal(result)
@children.each {|n|
n.raw_string_internal(result)
}
end
end
class Elem
def raw_string_internal(result)
@stag.raw_string_internal(result)
@children.each {|n| n.raw_string_internal(result) }
@etag.raw_string_internal(result) if @etag
end
end
module Tag
def init_raw_string() @raw_string = nil end
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
def raw_string_internal(result)
throw :raw_string_tag if !@raw_string
result << @raw_string
end
end
module Leaf
def init_raw_string() @raw_string = nil end
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
def raw_string_internal(result)
throw :raw_string_tag if !@raw_string
result << @raw_string
end
end
class Text
def raw_string=(arg)
if arg == @rcdata then
@raw_string = @rcdata
else
super
end
end
end
# :startdoc:
module Node
def eliminate_raw_string
raise NotImplementedError
end
end
# :stopdoc:
class Doc
def eliminate_raw_string
Doc.new(@children.map {|c| c.eliminate_raw_string })
end
end
class Elem
def eliminate_raw_string
Elem.new!(
@stag.eliminate_raw_string,
@empty ? nil : @children.map {|c| c.eliminate_raw_string },
@etag && @etag.eliminate_raw_string)
end
end
class Text
def eliminate_raw_string
Text.new_internal(@rcdata)
end
end
class STag
def eliminate_raw_string
STag.new(@qualified_name, @attributes, @inherited_context)
end
end
class ETag
def eliminate_raw_string
self.class.new(@qualified_name)
end
end
class XMLDecl
def eliminate_raw_string
XMLDecl.new(@version, @encoding, @standalone)
end
end
class DocType
def eliminate_raw_string
DocType.new(@root_element_name, @public_identifier, @system_identifier)
end
end
class ProcIns
def eliminate_raw_string
ProcIns.new(@target, @content)
end
end
class Comment
def eliminate_raw_string
Comment.new(@content)
end
end
# :startdoc:
end
htree-0.8/htree/scan.rb 0000644 0001750 0001750 00000015225 11747021106 014062 0 ustar jonas jonas require 'htree/htmlinfo'
require 'htree/regexp-util'
require 'htree/fstr'
module HTree
# :stopdoc:
module Pat
NameChar = /[-A-Za-z0-9._:]/
Name = /[A-Za-z_:]#{NameChar}*/
Nmtoken = /#{NameChar}+/
Comment_C = //m
Comment = Comment_C.disable_capture
CDATA_C = //m
CDATA = CDATA_C.disable_capture
QuotedAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)')/
QuotedAttr = QuotedAttr_C.disable_capture
ValidAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)'|(#{NameChar}*))|(#{Nmtoken})/
ValidAttr = ValidAttr_C.disable_capture
InvalidAttr1_C = /(#{Name})\s*=\s*(?:'([^'<>]*)'|"([^"<>]*)"|([^\s<>"']*(?![^\s<>"'])))|(#{Nmtoken})/
InvalidAttr1 = InvalidAttr1_C.disable_capture
InvalidAttr1End_C = /(#{Name})(?:\s*=\s*(?:'([^'<>]*)|"([^"<>]*)))/
InvalidAttr1End = InvalidAttr1End_C.disable_capture
QuotedStartTag_C = /<(#{Name})((?:\s+#{QuotedAttr})*)\s*>/
QuotedStartTag = QuotedStartTag_C.disable_capture
ValidStartTag_C = /<(#{Name})((?:\s+#{ValidAttr})*)\s*>/
ValidStartTag = ValidStartTag_C.disable_capture
InvalidStartTag_C = /<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*>/
InvalidStartTag = InvalidStartTag_C.disable_capture
StartTag = /#{QuotedStartTag}|#{ValidStartTag}|#{InvalidStartTag}/
QuotedEmptyTag_C = %r{<(#{Name})((?:\s+#{QuotedAttr})*)\s*/>}
QuotedEmptyTag = QuotedEmptyTag_C.disable_capture
ValidEmptyTag_C = %r{<(#{Name})((?:\s+#{ValidAttr})*)\s*/>}
ValidEmptyTag = ValidEmptyTag_C.disable_capture
InvalidEmptyTag_C = %r{<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*/>}
InvalidEmptyTag = InvalidEmptyTag_C.disable_capture
EmptyTag = /#{QuotedEmptyTag}|#{ValidEmptyTag}|#{InvalidEmptyTag}/
EndTag_C = %r{(#{Name})\s*>}
EndTag = EndTag_C.disable_capture
XmlVersionNum = /[a-zA-Z0-9_.:-]+/
XmlVersionInfo_C = /\s+version\s*=\s*(?:'(#{XmlVersionNum})'|"(#{XmlVersionNum})")/
XmlVersionInfo = XmlVersionInfo_C.disable_capture
XmlEncName = /[A-Za-z][A-Za-z0-9._-]*/
XmlEncodingDecl_C = /\s+encoding\s*=\s*(?:"(#{XmlEncName})"|'(#{XmlEncName})')/
XmlEncodingDecl = XmlEncodingDecl_C.disable_capture
XmlSDDecl_C = /\s+standalone\s*=\s*(?:'(yes|no)'|"(yes|no)")/
XmlSDDecl = XmlSDDecl_C.disable_capture
XmlDecl_C = /<\?xml#{XmlVersionInfo_C}#{XmlEncodingDecl_C}?#{XmlSDDecl_C}?\s*\?>/
XmlDecl = /<\?xml#{XmlVersionInfo}#{XmlEncodingDecl}?#{XmlSDDecl}?\s*\?>/
# xxx: internal DTD subset is not recognized: '[' (markupdecl | DeclSep)* ']' S?)?
SystemLiteral_C = /"([^"]*)"|'([^']*)'/
PubidLiteral_C = %r{"([\sa-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*)"|'([\sa-zA-Z0-9\-()+,./:=?;!*\#@$_%]*)'}
ExternalID_C = /(?:SYSTEM|PUBLIC\s+#{PubidLiteral_C})(?:\s+#{SystemLiteral_C})?/
DocType_C = //m
DocType = DocType_C.disable_capture
XmlProcIns_C = /<\?(#{Name})(?:\s+(.*?))?\?>/m
XmlProcIns = XmlProcIns_C.disable_capture
#ProcIns = /<\?([^>]*)>/m
end
def HTree.scan(input, is_xml=false)
is_html = false
cdata_content = nil
cdata_content_string = nil
pcdata = ''
first_element = true
index_otherstring = 1
index_str = 2
index_xmldecl = 3
index_doctype = 4
index_xmlprocins = 5
index_quotedstarttag = 6
index_quotedemptytag = 7
index_starttag = 8
index_endtag = 9
index_emptytag = 10
index_comment = 11
index_cdata = 12
index_end = 13
pat = /\G(.*?)((#{Pat::XmlDecl})
|(#{Pat::DocType})
|(#{Pat::XmlProcIns})
|(#{Pat::QuotedStartTag})
|(#{Pat::QuotedEmptyTag})
|(#{Pat::StartTag})
|(#{Pat::EndTag})
|(#{Pat::EmptyTag})
|(#{Pat::Comment})
|(#{Pat::CDATA})
|(\z))
/oxm
input.scan(pat) {
match = $~
if cdata_content
cdata_content_string << match[index_otherstring]
str = match[index_str]
if match[index_endtag] && str[Pat::Name] == cdata_content
unless cdata_content_string.empty?
yield [:text_cdata_content, HTree.frozen_string(cdata_content_string)]
end
yield [:etag, HTree.frozen_string(str)]
cdata_content = nil
cdata_content_string = nil
elsif match[index_end]
cdata_content_string << str
unless cdata_content_string.empty?
yield [:text_cdata_content, HTree.frozen_string(cdata_content_string)]
end
cdata_content = nil
cdata_content_string = nil
else
cdata_content_string << str
end
else
pcdata << match[index_otherstring]
str = match[index_str]
if !pcdata.empty?
yield [:text_pcdata, HTree.frozen_string(pcdata)]
pcdata = ''
end
if match[index_xmldecl]
yield [:xmldecl, HTree.frozen_string(str)]
is_xml = true
elsif match[index_doctype]
Pat::DocType_C =~ str
root_element_name = $1
public_identifier = $2 || $3
#system_identifier = $4 || $5
is_html = true if /\Ahtml\z/i =~ root_element_name
is_xml = true if public_identifier && %r{\A-//W3C//DTD XHTML } =~ public_identifier
yield [:doctype, HTree.frozen_string(str)]
elsif match[index_xmlprocins]
yield [:procins, HTree.frozen_string(str)]
elsif match[index_starttag] || match[index_quotedstarttag]
yield stag = [:stag, HTree.frozen_string(str)]
tagname = str[Pat::Name]
if first_element
if /\A(?:html|head|title|isindex|base|script|style|meta|link|object)\z/i =~ tagname
is_html = true
else
is_xml = true
end
first_element = false
end
if !is_xml && ElementContent[tagname] == :CDATA
cdata_content = tagname
cdata_content_string = ''
end
elsif match[index_endtag]
yield [:etag, HTree.frozen_string(str)]
elsif match[index_emptytag] || match[index_quotedemptytag]
yield [:emptytag, HTree.frozen_string(str)]
first_element = false
#is_xml = true
elsif match[index_comment]
yield [:comment, HTree.frozen_string(str)]
elsif match[index_cdata]
yield [:text_cdata_section, HTree.frozen_string(str)]
elsif match[index_end]
# pass
else
raise Exception, "unknown match [bug]"
end
end
}
return is_xml, is_html
end
# :startdoc:
end
htree-0.8/htree/output.rb 0000644 0001750 0001750 00000012017 11747021106 014472 0 ustar jonas jonas require 'htree/encoder'
require 'htree/doc'
require 'htree/elem'
require 'htree/leaf'
require 'htree/text'
module HTree
# :stopdoc:
class Text
ChRef = {
'>' => '>',
'<' => '<',
'"' => '"',
}
def output(out, context=nil)
out.output_text @rcdata.gsub(/[<>]/) {|s| ChRef[s] }
end
def to_attvalue_content
@rcdata.gsub(/[<>"]/) {|s| ChRef[s] }
end
def output_attvalue(out, context)
out.output_string '"'
out.output_text to_attvalue_content
out.output_string '"'
end
def output_cdata(out)
str = self.to_s
if %r{} =~ str
raise ArgumentError, "CDATA cannot contain '': #{str.inspect}"
end
out.output_string(str)
end
end
class Name
def output(out, context)
# xxx: validate namespace prefix
if xmlns?
if @local_name
out.output_string "xmlns:#{@local_name}"
else
out.output_string "xmlns"
end
else
out.output_string qualified_name
end
end
def output_attribute(text, out, context)
output(out, context)
out.output_string '='
text.output_attvalue(out, context)
end
end
class Doc
def output(out, context)
xmldecl = false
@children.each {|n|
if n.respond_to? :output_prolog_xmldecl
n.output_prolog_xmldecl(out, context) unless xmldecl # xxx: encoding?
xmldecl = true
else
n.output(out, context)
end
}
end
end
class Elem
def output(out, context)
if %r{\A\{http://www.w3.org/1999/xhtml\}(?:script|style)\z} =~ @stag.element_name.universal_name
children_context = @stag.output_stag(out, context)
out.output_cdata_content(@children, children_context)
@stag.output_etag(out, context)
elsif @empty
@stag.output_emptytag(out, context)
else
children_context = @stag.output_stag(out, context)
@children.each {|n| n.output(out, children_context) }
@stag.output_etag(out, context)
end
end
end
class STag
def output_attributes(out, context)
@attributes.each {|aname, text|
next if aname.xmlns?
out.output_string ' '
aname.output_attribute(text, out, context)
}
@context.output_namespaces(out, context)
end
def output_emptytag(out, context)
out.output_string '<'
@name.output(out, context)
children_context = output_attributes(out, context)
out.output_string "\n"
out.output_slash_if_xml
out.output_string ">"
children_context
end
def output_stag(out, context)
out.output_string '<'
@name.output(out, context)
children_context = output_attributes(out, context)
out.output_string "\n>"
children_context
end
def output_etag(out, context)
out.output_string ''
@name.output(out, context)
out.output_string "\n>"
end
end
class Context
def output_namespaces(out, outer_context)
unknown_namespaces = {}
@namespaces.each {|prefix, uri|
outer_uri = outer_context.namespace_uri(prefix)
if outer_uri == nil
unknown_namespaces[prefix] = uri
elsif outer_uri != uri
if prefix
out.output_string " xmlns:#{prefix}="
else
out.output_string " xmlns="
end
Text.new(uri).output_attvalue(out, outer_context)
end
}
unless unknown_namespaces.empty?
out.output_xmlns(unknown_namespaces)
end
outer_context.subst_namespaces(@namespaces)
end
end
class BogusETag
# don't output anything.
def output(out, context)
end
end
class XMLDecl
# don't output anything.
def output(out, context)
end
def output_prolog_xmldecl(out, context)
out.output_string ""
end
end
class DocType
def output(out, context)
out.output_string ""
end
def generate_content # :nodoc:
result = ''
if @public_identifier
result << "PUBLIC \"#{@public_identifier}\""
else
result << "SYSTEM"
end
# Although a system identifier is not omissible in XML,
# we cannot output it if it is not given.
if @system_identifier
if /"/ !~ @system_identifier
result << " \"#{@system_identifier}\""
else
result << " '#{@system_identifier}'"
end
end
result
end
end
class ProcIns
def output(out, context)
out.output_string "#{@target}"
out.output_string " #{@content}" if @content
out.output_string "?>"
end
end
class Comment
def output(out, context)
out.output_string ""
end
end
# :startdoc:
end
htree-0.8/htree/display.rb 0000644 0001750 0001750 00000003007 11747021106 014576 0 ustar jonas jonas require 'htree/output'
module HTree
module Node
# HTree::Node#display_xml prints the node as XML.
#
# The first optional argument, out ,
# specifies output target.
# It should respond to << .
# If it is not specified, $stdout is used.
#
# The second optional argument, encoding ,
# specifies output MIME charset (character encoding).
# If it is not specified, HTree::Encoder.internal_charset is used.
#
# HTree::Node#display_xml returns out .
def display_xml(out=$stdout, encoding=HTree::Encoder.internal_charset)
encoder = HTree::Encoder.new(encoding)
self.output(encoder, HTree::DefaultContext)
# don't call finish_with_xmldecl because self already has a xml decl.
out << encoder.finish
out
end
# HTree::Node#display_html prints the node as HTML.
#
# The first optional argument, out ,
# specifies output target.
# It should respond to << .
# If it is not specified, $stdout is used.
#
# The second optional argument, encoding ,
# specifies output MIME charset (character encoding).
# If it is not specified, HTree::Encoder.internal_charset is used.
#
# HTree::Node#display_html returns out .
def display_html(out=$stdout, encoding=HTree::Encoder.internal_charset)
encoder = HTree::Encoder.new(encoding)
encoder.html_output = true
self.output(encoder, HTree::HTMLContext)
out << encoder.finish
out
end
end
end
htree-0.8/htree/loc.rb 0000644 0001750 0001750 00000023010 11747021106 013702 0 ustar jonas jonas require 'htree/modules'
require 'htree/elem'
require 'htree/inspect'
module HTree
module Node
# creates a location object which points to self.
def make_loc
self.class::Loc.new(nil, nil, self)
end
# return self.
def to_node
self
end
# +subst+ substitutes several subtrees at once.
#
# t = HTree(' ')
# l = t.make_loc
# t2 = t.subst({
# l.get_subnode(0, 'k') => 'v',
# l.get_subnode(0, -1) => HTree(' '),
# l.get_subnode(0, 1) => nil,
# l.get_subnode(0, 2, 0) => HTree(' '),
# })
# pp t2
# # =>
# # {emptyelem } {emptyelem } {elem {emptyelem }}}>
def subst(pairs)
pairs = pairs.map {|key, val|
key = key.index_list(self)
unless Array === val
val = [val]
end
[key, val]
}
pairs_empty_key, pairs_nonempty_key =
pairs.partition {|key, val| key.empty? }
if !pairs_empty_key.empty?
if !pairs_nonempty_key.empty?
raise ArgumentError, "cannot substitute a node under substituting tree."
end
result = []
pairs_empty_key.each {|key, val| result.concat val }
result.compact!
if result.length == 1
return result[0]
else
raise ArgumentError, "cannot substitute top node by multiple nodes: #{nodes.inspect}"
end
end
if pairs_nonempty_key.empty?
return self
end
subst_internal(pairs)
end
def subst_internal(pairs) # :nodoc:
subnode_pairs = {}
pairs.each {|key, val|
k = key.pop
(subnode_pairs[k] ||= []) << [key, val]
}
subnode_pairs = subnode_pairs.map {|k, subpairs|
s = get_subnode(k)
subpairs_empty_key, subpairs_nonempty_key =
subpairs.partition {|key, val| key.empty? }
if !subpairs_empty_key.empty?
if !subpairs_nonempty_key.empty?
raise ArgumentError, "cannot substitute a node under substituting tree."
end
r = []
subpairs_empty_key.each {|key, val| r.concat val }
[k, r.compact]
elsif subpairs_nonempty_key.empty?
[k, s]
else
[k, s.subst_internal(subpairs)]
end
}
subst_subnode(subnode_pairs)
end
end
# :stopdoc:
class Doc; def node_test_string() 'doc()' end end
class Elem; def node_test_string() @stag.element_name.qualified_name end end
class Text; def node_test_string() 'text()' end end
class BogusETag; def node_test_string() 'bogus-etag()' end end
class XMLDecl; def node_test_string() 'xml-declaration()' end end
class DocType; def node_test_string() 'doctype()' end end
class ProcIns; def node_test_string() 'processing-instruction()' end end
class Comment; def node_test_string() 'comment()' end end
module Container
def find_loc_step(index)
if index < 0 || @children.length <= index
return "*[#{index}]"
end
return @loc_step_children[index].dup if defined? @loc_step_children
count = {}
count.default = 0
steps = []
@children.each {|c|
node_test = c.node_test_string
count[node_test] += 1
steps << [node_test, count[node_test]]
}
@loc_step_children = []
steps.each {|node_test, i|
if count[node_test] == 1
@loc_step_children << node_test
else
@loc_step_children << "#{node_test}[#{i}]"
end
}
return @loc_step_children[index].dup
end
end
class Elem
def find_loc_step(index)
return super if Integer === index
if String === index
index = Name.parse_attribute_name(index, DefaultContext)
end
unless Name === index
raise TypeError, "invalid index: #{index.inspect}"
end
"@#{index.qualified_name}"
end
end
# :startdoc:
end
class HTree::Location
def initialize(parent, index, node) # :nodoc:
if parent
@parent = parent
@index = index
@node = parent.node.get_subnode(index)
if !@node.equal?(node)
raise ArgumentError, "unexpected node"
end
else
@parent = nil
@index = nil
@node = node
end
if @node && self.class != @node.class::Loc
raise ArgumentError, "invalid location class: #{self.class} should be #{node.class::Loc}"
end
@subloc = {}
end
attr_reader :parent, :index, :node
alias to_node node
# return self.
def make_loc
self
end
# +top+ returns the originator location.
#
# t = HTree('')
# l = t.make_loc.get_subnode(0, 0, 0, 0)
# p l, l.top
# # =>
# #
# #
def top
result = self
while result.parent
result = result.parent
end
result
end
# +subst_itself+ substitutes the node pointed by the location.
# It returns the location of substituted node.
#
# t1 = HTree('')
# p t1
# l1 = t1.make_loc.get_subnode(0, 0, 0, 0)
# p l1
# l2 = l1.subst_itself(HTree(' '))
# p l2
# t2 = l2.top.to_node
# p t2
# # =>
# # {elem {elem {emptyelem }}}}>
# #
# #
# # {elem {elem {emptyelem }}}}>
#
def subst_itself(node)
if @parent
new_index = @index
if !@node
if Integer === @index
if @index < 0
new_index = 0
elsif @parent.to_node.children.length < @index
new_index = @parent.to_node.children.length
end
end
end
@parent.subst_itself(@parent.to_node.subst_subnode({@index=>node})).get_subnode(new_index)
else
node.make_loc
end
end
# +subst+ substitutes several subtrees at once.
#
# t = HTree(' ')
# l = t.make_loc
# l2 = l.subst({
# l.root.get_subnode('k') => 'v',
# l.root.get_subnode(-1) => HTree(' '),
# l.find_element('y') => nil,
# l.find_element('z').get_subnode(0) => HTree(' '),
# })
# pp l2, l2.to_node
# # =>
# #
# # {emptyelem } {emptyelem } {elem {emptyelem }}}>
def subst(pairs)
subst_itself(@node.subst(pairs))
end
# +loc_list+ returns an array containing from location's root to itself.
#
# t = HTree('')
# l = t.make_loc.get_subnode(0, 0, 0)
# pp l, l.loc_list
# # =>
# #
# [#,
# #,
# #,
# #]
#
def loc_list
loc = self
result = [self]
while loc = loc.parent
result << loc
end
result.reverse!
result
end
# +path+ returns the path of the location.
#
# l = HTree.parse("x ").make_loc
# l = l.get_subnode(0, 0, 0)
# p l.path # => "doc()/a/b[1]/text()"
def path
result = ''
loc_list.each {|loc|
if parent = loc.parent
result << '/' << parent.node.find_loc_step(loc.index)
else
result << loc.node.node_test_string
end
}
result
end
def index_list(node) # :nodoc:
result = []
loc = self
while parent = loc.parent
return result if loc.to_node.equal? node
result << loc.index
loc = parent
end
return result if loc.to_node.equal? node
raise ArgumentError, "the location is not under the node: #{self.path}"
end
# :stopdoc:
def pretty_print(q)
q.group(1, "#<#{self.class.name}", '>') {
q.text ':'
q.breakable
loc_list.each {|loc|
if parent = loc.parent
q.text '/'
q.group { q.breakable '' }
q.text parent.node.find_loc_step(loc.index)
else
q.text loc.node.node_test_string
end
}
}
end
alias inspect pretty_print_inspect
# :startdoc:
end
module HTree::Container::Loc
# +get_subnode+ returns a location object which points to a subnode
# indexed by _index_.
def get_subnode_internal(index) # :nodoc:
return @subloc[index] if @subloc.include? index
node = @node.get_subnode(index)
if node
@subloc[index] = node.class::Loc.new(self, index, node)
else
@subloc[index] = HTree::Location.new(self, index, node)
end
end
# +subst_subnode+ returns the location which refers the substituted tree.
# loc.subst_subnode(pairs) -> loc
#
# t = HTree('')
# l = t.make_loc.get_subnode(0, 0)
# l = l.subst_subnode({0=>HTree(' ')})
# pp t, l.top.to_node
# # =>
# # {elem {emptyelem }}}>
# # {elem {emptyelem }}}>
#
def subst_subnode(pairs)
self.subst_itself(@node.subst_subnode(pairs))
end
# +children+ returns an array of child locations.
def children
(0...@node.children.length).map {|i| get_subnode(i) }
end
end
class HTree::Elem::Loc
def context() @node.context end
# +element_name+ returns the name of the element name as a Name object.
def element_name() @node.element_name end
def empty_element?() @node.empty_element? end
# +each_attribute+ iterates over each attributes.
def each_attribute
@node.each_attribute {|attr_name, attr_text|
attr_loc = get_subnode(attr_name)
yield attr_name, attr_loc
}
end
end
class HTree::Text::Loc
def to_s() @node.to_s end
def strip() @node.strip end
def empty?() @node.empty? end
end
htree-0.8/htree/gencode.rb 0000644 0001750 0001750 00000011105 11747021106 014533 0 ustar jonas jonas require 'htree/encoder'
require 'htree/output'
# :stopdoc:
module HTree
module Node
def generate_xml_output_code(outvar='out', contextvar='top_context')
namespaces = HTree::Context::DefaultNamespaces.dup
namespaces.default = nil
context = Context.new(namespaces)
gen = HTree::GenCode.new(outvar, contextvar)
output(gen, context)
gen.finish
end
end
class GenCode
def initialize(outvar, contextvar, internal_encoding=Encoder.internal_charset)
@outvar = outvar
@contextvar = contextvar
@state = :none
@buffer = ''
@internal_encoding = internal_encoding
@code = ''
@html_output = nil
end
attr_reader :outvar, :contextvar
def html_output?
@html_output
end
def html_output=(flag)
@html_output = flag
end
class CDATABuffer
def initialize
@buf = ''
end
def html_output?
true
end
def not_valid_for_html_cdata(*args)
raise ArgumentError, "CDATA content only accept texts."
end
alias output_slash_if_xml not_valid_for_html_cdata
alias output_cdata_content not_valid_for_html_cdata
alias output_dynamic_attvalue not_valid_for_html_cdata
def output_string(string)
@buf << string
end
def output_text(string)
@buf << string
end
ChRef = {
'&' => '&',
'<' => '<',
'>' => '>',
'"' => '"',
}
def output_dynamic_text(string)
if string.respond_to? :rcdata
@buf << string.rcdata.gsub(/[<>]/) { ChRef[$&] }
else
@buf << string.to_s.gsub(/[&<>]/) { ChRef[$&] }
end
end
def result
if %r{[<>]} =~ @buf
raise ArgumentError, "cdata contains non-text : #{@buf.inspect}"
end
str = HTree::Text.parse_pcdata(@buf).to_s
if %r{} =~ str
raise ArgumentError, "cdata contains '' : #{str.inspect}"
end
str
end
end
def output_cdata_content(content, context)
tmp_outvar = @outvar + '_tmp'
output_logic_line "#{@outvar} = #{@outvar}.output_cdata_content_do(#{@outvar},"
output_logic_line "lambda { #{@outvar} = HTree::GenCode::CDATABuffer.new },"
output_logic_line "lambda {"
content.each {|n| n.output(self, context) }
output_logic_line "},"
output_logic_line "lambda {|#{tmp_outvar}| #{tmp_outvar}.output_string(#{@outvar}.result) })"
end
def output_slash_if_xml
output_logic_line "#{@outvar}.output_slash_if_xml"
end
def output_dynamic_text(expr)
flush_buffer
@code << "#{@outvar}.output_dynamic_text((#{expr}))\n"
end
def output_dynamic_tree(expr, context_expr)
flush_buffer
@code << "(#{expr}).output(#{@outvar}, #{context_expr})\n"
end
def output_dynamic_attvalue(expr)
flush_buffer
@code << "#{@outvar}.output_dynamic_attvalue((#{expr}))\n"
end
def output_logic_line(line)
flush_buffer
@code << line << "\n"
end
def output_string(str)
return if str.empty?
if @state != :string
flush_buffer
@state = :string
end
@buffer << str
end
def output_text(str)
return if str.empty?
if /\A[\s\x21-\x7e]+\z/ =~ str && @state == :string
# Assumption: external charset can represent white spaces and
# ASCII printable.
output_string(str)
return
end
if @state != :text
flush_buffer
@state = :text
end
@buffer << str
end
ChRef = {
'&' => '&',
'>' => '>',
'<' => '<',
'"' => '"',
}
def output_xmlns(namespaces)
unless namespaces.empty?
flush_buffer
namespaces.each {|k, v|
if k
ks = k.dump
aname = "xmlns:#{k}"
else
ks = "nil"
aname = "xmlns"
end
@code << "if #{@contextvar}.namespace_uri(#{ks}) != #{v.dump}\n"
output_string " #{aname}=\""
output_text v.gsub(/[&<>"]/) {|s| ChRef[s] }
output_string '"'
flush_buffer
@code << "end\n"
}
end
end
def flush_buffer
return if @buffer.empty?
case @state
when :string
@code << "#{@outvar}.output_string #{@buffer.dump}\n"
@buffer = ''
when :text
@code << "#{@outvar}.output_text #{@buffer.dump}\n"
@buffer = ''
end
end
def finish
flush_buffer
@code
end
end
end
# :startdoc:
htree-0.8/htree/doc.rb 0000644 0001750 0001750 00000010461 11747021106 013700 0 ustar jonas jonas require 'htree/modules'
require 'htree/container'
module HTree
class Doc
# :stopdoc:
class << self
alias new! new
end
# :startdoc:
# The arguments should be a sequence of follows.
# [String object] specified string is converted to HTree::Text.
# [HTree::Node object] used as a child.
# [HTree::Doc object]
# used as children.
# It is expanded except HTree::XMLDecl and HTree::DocType objects.
# [Array of String, HTree::Node and HTree::Doc] used as children.
#
def Doc.new(*args)
children = []
args.each {|arg|
arg = arg.to_node if HTree::Location === arg
case arg
when Array
arg.each {|a|
a = a.to_node if HTree::Location === a
case a
when HTree::Doc
children.concat(a.children.reject {|c|
HTree::XMLDecl === c || HTree::DocType === c
})
when HTree::Node
children << a
when String
children << Text.new(a)
else
raise TypeError, "unexpected argument: #{arg.inspect}"
end
}
when HTree::Doc
children.concat(arg.children.reject {|c|
HTree::XMLDecl === c || HTree::DocType === c
})
when HTree::Node
children << arg
when String
children << Text.new(arg)
else
raise TypeError, "unexpected argument: #{arg.inspect}"
end
}
new!(children)
end
def initialize(children=[]) # :notnew:
@children = children.dup.freeze
unless @children.all? {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
unacceptable = @children.reject {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
unacceptable = unacceptable.map {|uc| uc.inspect }.join(', ')
raise TypeError, "Unacceptable document child: #{unacceptable}"
end
end
def get_subnode_internal(index) # :nodoc:
unless Integer === index
raise TypeError, "invalid index: #{index.inspect}"
end
if index < 0 || @children.length <= index
nil
else
@children[index]
end
end
# doc.subst_subnode(pairs) -> doc
#
# The argument _pairs_ should be a hash or an assocs.
# Its key should be an integer which means an index for children.
#
# Its value should be one of follows.
# [HTree::Node object] specified object is used as is.
# [String object] specified string is converted to HTree::Text
# [Array of above] specified HTree::Node and String is used in that order.
# [nil] delete corresponding node.
#
# d = HTree(' ')
# p d.subst_subnode({0=>HTree(' '), 2=>HTree(' ')})
# p d.subst_subnode([[0,HTree(' ')], [2,HTree(' ')]])
# # =>
# #} {emptyelem } {emptyelem }>
# #} {emptyelem } {emptyelem }>
#
def subst_subnode(pairs)
hash = {}
pairs.each {|index, value|
unless Integer === index
raise TypeError, "invalid index: #{index.inspect}"
end
value = value.to_node if HTree::Location === value
case value
when Node
value = [value]
when String
value = [value]
when Array
value = value.dup
when nil
value = []
else
raise TypeError, "invalid value: #{value.inspect}"
end
value.map! {|v|
v = v.to_node if HTree::Location === v
case v
when Node
v
when String
Text.new(v)
else
raise TypeError, "invalid value: #{v.inspect}"
end
}
if !hash.include?(index)
hash[index] = []
end
hash[index].concat value
}
children_left = []
children = @children.dup
children_right = []
hash.keys.sort.each {|index|
value = hash[index]
if index < 0
children_left << value
elsif children.length <= index
children_right << value
else
children[index] = value
end
}
children = [children_left, children, children_right].flatten.compact
Doc.new(children)
end
end
end
htree-0.8/htree/encoder.rb 0000644 0001750 0001750 00000023126 11747021106 014554 0 ustar jonas jonas if !"".respond_to?(:encode)
require 'iconv'
end
module HTree
class DummyEncodingConverter
def initialize(encoding)
@encoding = encoding
end
def primitive_convert(src, dst, destination_buffer=nil, destination_byteoffset=nil, destination_bytesize=nil, opts=nil)
dst << src
src.clear
:source_buffer_empty
end
def convert(str)
str
end
def finish
""
end
end
class Encoder
# HTree::Encoder.internal_charset returns the MIME charset corresponding to $KCODE.
#
# - 'ISO-8859-1' when $KCODE=='NONE'
# - 'UTF-8' when $KCODE=='UTF8'
# - 'EUC-JP' when $KCODE=='EUC'
# - 'Shift_JIS' when $KCODE=='SJIS'
#
# This mapping ignores EUC-KR and various single byte charset other than ISO-8859-1 at least.
# This should be fixed when Ruby is m17nized.
def Encoder.internal_charset
if Object.const_defined? :Encoding
Encoding.default_external.name
else
KcodeCharset[$KCODE]
end
end
def initialize(output_encoding, internal_encoding=HTree::Encoder.internal_charset)
@buf = ''
@internal_encoding = internal_encoding
@output_encoding = output_encoding
if defined? Encoding::Converter
if @internal_encoding == output_encoding
@ic = DummyEncodingConverter.new(@internal_encoding)
else
@ic = Encoding::Converter.new(@internal_encoding, output_encoding)
end
else
@ic = Iconv.new(output_encoding, @internal_encoding)
end
@charpat = FirstCharPattern[internal_encoding]
@subcharset_list = SubCharset[output_encoding] || []
@subcharset_ic = {}
@subcharset_list.each {|subcharset|
if defined? Encoding::Converter
if @internal_encoding == subcharset
@subcharset_ic[subcharset] = DummyEncodingConverter.new(@internal_encoding)
else
@subcharset_ic[subcharset] = Encoding::Converter.new(@internal_encoding, subcharset)
end
else
@subcharset_ic[subcharset] = Iconv.new(subcharset, @internal_encoding)
end
}
@html_output = false
end
# :stopdoc:
def html_output?
@html_output
end
def html_output=(flag)
@html_output = flag
end
def output_cdata_content_do(out, pre, body, post)
if @html_output
pre.call
body.call
post.call(out)
else
body.call
end
return out
end
def output_slash_if_xml
if !@html_output
output_string('/')
end
end
def output_cdata_content(content, context)
if @html_output
# xxx: should raise an error for non-text node?
texts = content.grep(HTree::Text)
text = HTree::Text.concat(*texts)
text.output_cdata(self)
else
content.each {|n| n.output(self, context) }
end
end
def output_cdata_for_html(*args)
str = args.join('')
if %r{} =~ str
raise ArgumentError, "cdata contains '' : #{str.inspect}"
end
output_string str
end
def output_string(internal_str, external_str=nil)
if !external_str
if @ic.respond_to? :convert
external_str = @ic.convert(internal_str)
else
external_str = @ic.iconv(internal_str)
end
end
@buf.force_encoding(external_str.encoding) if @buf.empty? && @buf.respond_to?(:force_encoding) # xxx: should be fixed Ruby itself
@buf << external_str
@subcharset_ic.reject! {|subcharset, ic|
if ic.respond_to? :convert
begin
ic.convert(internal_str) != external_str
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
true
end
else
begin
ic.iconv(internal_str) != external_str
rescue Iconv::Failure
true
end
end
}
nil
end
def output_text(string)
if string.respond_to? :encode
if string.encoding != Encoding::US_ASCII &&
string.encoding.to_s != @internal_encoding
string = string.encode(@internal_encoding)
end
string = string.dup.force_encoding("ASCII-8BIT")
end
while true
if @ic.respond_to? :convert
if string
src = string.dup
res = @ic.primitive_convert(src, dst="", nil, nil, :partial_input => true)
else
res = @ic.primitive_convert(nil, dst="")
end
case res
when :invalid_byte_sequence
success = dst
failed = src
_, _, _, error_bytes, _ = @ic.primitive_errinfo
preconv_bytesize = string.bytesize - failed.bytesize - error_bytes.bytesize
output_string string[0, preconv_bytesize], success
string = @ic.putback + failed
output_string '?'
next
when :undefined_conversion
success = dst
failed = src
_, enc1, _, error_bytes, _ = @ic.primitive_errinfo
preconv_bytesize = string.bytesize - failed.bytesize - error_bytes.bytesize
output_string string[0, preconv_bytesize], success
string = @ic.putback + failed
output_string error_bytes.encode('US-ASCII', enc1, :xml=>:text)
next
when :source_buffer_empty, :finished
output_string string, dst
return
else
raise "unexpected encoding converter result: #{res}"
end
else
begin
output_string string, @ic.iconv(string)
return
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e
success = e.success
failed = e.failed
end
output_string string[0, string.length - failed.length], success
end
if FirstCharPattern[@internal_encoding] !~ failed
# xxx: should be configulable?
#raise ArgumentError, "cannot extract first character: #{e.failed.dump}"
string = failed[1, failed.length-1]
output_string '?'
else
char = $&
rest = $'
begin
if char.respond_to? :encode
excs = [Encoding::UndefinedConversionError,
Encoding::InvalidByteSequenceError]
ucode = char.encode("UTF-8", @internal_encoding).unpack("U")[0]
else
excs = [Iconv::IllegalSequence, Iconv::InvalidCharacter]
ucode = Iconv.conv("UTF-8", @internal_encoding, char).unpack("U")[0]
end
char = "#{ucode};"
rescue *excs
# xxx: should be configulable?
char = '?'
end
output_string char
string = rest
end
end
end
ChRef = {
'&' => '&',
'<' => '<',
'>' => '>',
'"' => '"',
}
def output_dynamic_text(string)
if string.respond_to? :rcdata
output_text(string.rcdata.gsub(/[<>]/) { ChRef[$&] })
else
output_text(string.to_s.gsub(/[&<>]/) { ChRef[$&] })
end
end
def output_dynamic_attvalue(string)
if string.respond_to? :rcdata
output_text(string.rcdata.gsub(/[<>"]/) { ChRef[$&] })
else
output_text(string.to_s.gsub(/[&<>"]/) { ChRef[$&] })
end
end
# :startdoc:
def finish
if @ic.respond_to? :finish
external_str = @ic.finish
else
external_str = @ic.close
end
@buf << external_str
@subcharset_ic.reject! {|subcharset, ic|
if ic.respond_to? :finish
begin
ic.finish != external_str
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
true
end
else
begin
ic.close != external_str
rescue Iconv::Failure
true
end
end
}
@buf
end
def finish_with_xmldecl
content = finish
str = ""
if str.respond_to? :encode
xmldecl = str.encode(@output_encoding, 'US-ASCII')
else
xmldecl = Iconv.conv(@output_encoding, 'US-ASCII', str)
end
xmldecl + content
end
def minimal_charset
@subcharset_list.each {|subcharset|
if @subcharset_ic.include? subcharset
return subcharset
end
}
@output_encoding
end
# :stopdoc:
KcodeCharset = {
'EUC' => 'EUC-JP',
'SJIS' => 'Shift_JIS',
'UTF8' => 'UTF-8',
'NONE' => 'ISO-8859-1',
}
SingleCharPattern = {
'EUC-JP' => /(?:
[\x00-\x7f]
|[\xa1-\xfe][\xa1-\xfe]
|\x8e[\xa1-\xfe]
|\x8f[\xa1-\xfe][\xa1-\xfe])/nx,
'Shift_JIS' => /(?:
[\x00-\x7f]
|[\x81-\x9f][\x40-\x7e\x80-\xfc]
|[\xa1-\xdf]
|[\xe0-\xfc][\x40-\x7e\x80-\xfc])/nx,
'UTF-8' => /(?:
[\x00-\x7f]
|[\xc0-\xdf][\x80-\xbf]
|[\xe0-\xef][\x80-\xbf][\x80-\xbf]
|[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]
|[\xf8-\xfb][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf]
|[\xfc-\xfd][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf])/nx,
'ISO-8859-1' => /[\x00-\xff]/n
}
FirstCharPattern = {}
SingleCharPattern.each {|charset, pat|
FirstCharPattern[charset] = /\A#{pat}/
}
SubCharset = {
'ISO-2022-JP-2' => ['US-ASCII', 'ISO-2022-JP'],
'ISO-2022-JP-3' => ['US-ASCII', 'ISO-2022-JP'],
'UTF-16BE' => [],
'UTF-16LE' => [],
'UTF-16' => [],
}
SubCharset.default = ['US-ASCII']
# :startdoc:
end
end
htree-0.8/htree/extract_text.rb 0000644 0001750 0001750 00000000776 11747021106 015661 0 ustar jonas jonas require 'htree/text'
require 'htree/doc'
require 'htree/elem'
module HTree
module Node
def extract_text
raise NotImplementedError
end
end
class Location
def extract_text
to_node.extract_text
end
end
# :stopdoc:
module Container
def extract_text
Text.concat(*@children.map {|n| n.extract_text })
end
end
module Leaf
def extract_text
Text.new('')
end
end
class Text
def extract_text
self
end
end
# :startdoc:
end
htree-0.8/htree/inspect.rb 0000644 0001750 0001750 00000004514 11747021106 014602 0 ustar jonas jonas require 'pp'
require 'htree/doc'
require 'htree/elem'
require 'htree/leaf'
require 'htree/tag'
require 'htree/output'
require 'htree/raw_string'
module HTree
# :stopdoc:
class Doc
def pretty_print(q)
q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
end
alias inspect pretty_print_inspect
end
class Elem
def pretty_print(q)
if @empty
q.group(1, '{emptyelem', '}') {
q.breakable; q.pp @stag
}
else
q.group(1, "{elem", "}") {
q.breakable; q.pp @stag
@children.each {|elt| q.breakable; q.pp elt }
if @etag
q.breakable; q.pp @etag
end
}
end
end
alias inspect pretty_print_inspect
end
module Leaf
def pretty_print(q)
q.group(1, '{', '}') {
q.text self.class.name.sub(/.*::/,'').downcase
if rs = @raw_string
rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
q.breakable
q.pp line
}
elsif self.respond_to? :display_xml
q.breakable
q.text self.display_xml('')
end
}
end
alias inspect pretty_print_inspect
end
class Name
def inspect
if xmlns?
@local_name ? "xmlns:#{@local_name}" : "xmlns"
elsif !@namespace_uri || @namespace_uri.empty?
@local_name
elsif @namespace_prefix
"#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
elsif @namespace_prefix == false
"-{#{@namespace_uri}}#{@local_name}"
else
"{#{@namespace_uri}}#{@local_name}"
end
end
end
class STag
def pretty_print(q)
q.group(1, '<', '>') {
q.text @name.inspect
@attributes.each {|n, t|
q.breakable
q.text "#{n.inspect}=\"#{t.to_attvalue_content}\""
}
}
end
alias inspect pretty_print_inspect
end
class ETag
def pretty_print(q)
q.group(1, '', '>') {
q.text @qualified_name
}
end
alias inspect pretty_print_inspect
end
class BogusETag
def pretty_print(q)
q.group(1, '{', '}') {
q.text self.class.name.sub(/.*::/,'').downcase
if rs = @raw_string
q.breakable
q.text rs
else
q.text "#{@qualified_name}>"
end
}
end
end
# :startdoc:
end
htree-0.8/htree/name.rb 0000644 0001750 0001750 00000007277 11747021106 014066 0 ustar jonas jonas require 'htree/scan' # for Pat::Nmtoken
require 'htree/context'
module HTree
# Name represents a element name and attribute name.
# It consists of a namespace prefix, a namespace URI and a local name.
class Name
=begin
element name prefix uri localname
{u}n, n with xmlns=u nil 'u' 'n'
p{u}n, p:n with xmlns:p=u 'p' 'u' 'n'
n with xmlns='' nil '' 'n'
attribute name
xmlns= 'xmlns' nil nil
xmlns:n= 'xmlns' nil 'n'
p{u}n=, p:n= with xmlns:p=u 'p' 'u' 'n'
n= nil '' 'n'
=end
def Name.parse_element_name(name, context)
if /\{(.*)\}/ =~ name
# "{u}n" means "use default namespace",
# "p{u}n" means "use the specified prefix p"
$` == '' ? Name.new(nil, $1, $') : Name.new($`, $1, $')
elsif /:/ =~ name && !context.namespace_uri($`).empty?
Name.new($`, context.namespace_uri($`), $')
elsif !context.namespace_uri(nil).empty?
Name.new(nil, context.namespace_uri(nil), name)
else
Name.new(nil, '', name)
end
end
def Name.parse_attribute_name(name, context)
if name == 'xmlns'
Name.new('xmlns', nil, nil)
elsif /\Axmlns:/ =~ name
Name.new('xmlns', nil, $')
elsif /\{(.*)\}/ =~ name
case $`
when ''; Name.new(nil, $1, $')
else Name.new($`, $1, $')
end
elsif /:/ =~ name && !context.namespace_uri($`).empty?
Name.new($`, context.namespace_uri($`), $')
else
Name.new(nil, '', name)
end
end
NameCache = {}
def Name.new(namespace_prefix, namespace_uri, local_name)
key = [namespace_prefix, namespace_uri, local_name, self]
NameCache.fetch(key) {
0.upto(2) {|i| key[i] = key[i].dup.freeze if key[i] }
NameCache[key] = super(key[0], key[1], key[2])
}
end
def initialize(namespace_prefix, namespace_uri, local_name)
@namespace_prefix = namespace_prefix
@namespace_uri = namespace_uri
@local_name = local_name
if @namespace_prefix && /\A#{Pat::Nmtoken}\z/o !~ @namespace_prefix
raise HTree::Error, "invalid namespace prefix: #{@namespace_prefix.inspect}"
end
if @local_name && /\A#{Pat::Nmtoken}\z/o !~ @local_name
raise HTree::Error, "invalid local name: #{@local_name.inspect}"
end
if @namespace_prefix == 'xmlns'
unless @namespace_uri == nil
raise HTree::Error, "Name object for xmlns:* must not have namespace URI: #{@namespace_uri.inspect}"
end
else
unless String === @namespace_uri
raise HTree::Error, "invalid namespace URI: #{@namespace_uri.inspect}"
end
end
end
attr_reader :namespace_prefix, :namespace_uri, :local_name
def xmlns?
@namespace_prefix == 'xmlns' && @namespace_uri == nil
end
def universal_name
if @namespace_uri && !@namespace_uri.empty?
"{#{@namespace_uri}}#{@local_name}"
else
@local_name.dup
end
end
def qualified_name
if @namespace_uri && !@namespace_uri.empty?
if @namespace_prefix
"#{@namespace_prefix}:#{@local_name}"
else
@local_name.dup
end
elsif @local_name
@local_name.dup
else
"xmlns"
end
end
def to_s
if @namespace_uri && !@namespace_uri.empty?
if @namespace_prefix
"#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
else
"{#{@namespace_uri}}#{@local_name}"
end
elsif @local_name
@local_name.dup
else
"xmlns"
end
end
end
end
htree-0.8/htree/parse.rb 0000644 0001750 0001750 00000030700 11747021106 014243 0 ustar jonas jonas require 'htree/scan'
require 'htree/htmlinfo'
require 'htree/text'
require 'htree/tag'
require 'htree/leaf'
require 'htree/doc'
require 'htree/elem'
require 'htree/raw_string'
require 'htree/context'
require 'htree/encoder'
require 'htree/fstr'
module HTree
# HTree.parse parses input and return a document tree.
# represented by HTree::Doc.
#
# input should be a String or
# an object which respond to read or open method.
# For example, IO, StringIO, Pathname, URI::HTTP and URI::FTP are acceptable.
# Note that the URIs need open-uri.
#
# HTree.parse guesses input is HTML or not and XML or not.
#
# If it is guessed as HTML, the default namespace in the result is set to http://www.w3.org/1999/xhtml
# regardless of input has XML namespace declaration or not nor even it is pre-XML HTML.
#
# If it is guessed as HTML and not XML, all element and attribute names are downcaseed.
#
# If opened file or read content has charset method,
# HTree.parse decode it according to $KCODE before parsing.
# Otherwise HTree.parse assumes the character encoding of the content is
# compatible to $KCODE.
# Note that the charset method is provided by URI::HTTP with open-uri.
def HTree.parse(input)
HTree.with_frozen_string_hash {
parse_as(input, false)
}
end
# HTree.parse_xml parses input as XML and
# return a document tree represented by HTree::Doc.
#
# It behaves almost same as HTree.parse but it assumes input is XML
# even if no XML declaration.
# The assumption causes following differences.
# * doesn't downcase element name.
# * The content of '
assert_equal([
[:stag, ''],
[:stag, ''],
[:stag, ' '],
], scan(s))
s = '\nd\n\ne"
assert_equal([
[:stag, ""],
[:text_pcdata, "a\n"],
[:stag, ""],
[:text_pcdata, "\nb\n"],
[:stag, ""],
[:text_pcdata, "\nd\n"],
[:etag, " "],
[:text_pcdata, "\ne"],
], scan(s))
end
def test_eol_xml
# In XML, line breaks are treated as part of content.
# It's because KEEPRSRE is yes in XML.
# http://www.satoshii.org/markup/websgml/valid-xml#keeprsre
s = "a\n\nb\n\nc\n \nd\n \ne"
assert_equal([
[:xmldecl, ""],
[:text_pcdata, "a\n"],
[:stag, ""],
[:text_pcdata, "\nb\n"],
[:stag, ""],
[:text_pcdata, "\nc\n"],
[:etag, " "],
[:text_pcdata, "\nd\n"],
[:etag, " "],
[:text_pcdata, "\ne"],
], scan(s))
end
def test_xml_html_detection
assert_equal([false, true], HTree.scan("") {})
assert_equal([true, false], HTree.scan(" ") {})
assert_equal([true, true], HTree.scan('') {})
end
def test_quoted_attr
assert_equal([[:emptytag, ' ']], scan(' '))
end
def test_bare_slash
assert_equal([[:stag, '']], scan(''))
assert_equal([[:stag, '']], scan(''))
end
end
htree-0.8/test/test-rexml.rb 0000644 0001750 0001750 00000004167 11747021106 015115 0 ustar jonas jonas require 'test/unit'
require 'htree/parse'
require 'htree/rexml'
begin
require 'rexml/document'
rescue LoadError
end
class TestREXML < Test::Unit::TestCase
def test_doc
r = HTree.parse(' ').to_rexml
assert_instance_of(REXML::Document, r)
end
def test_elem
r = HTree.parse(' ').to_rexml
assert_instance_of(REXML::Element, e = r.root)
assert_equal('root', e.name)
assert_equal('b', e.attribute('a').to_s)
end
def test_text
r = HTree.parse('aaa ').to_rexml
assert_instance_of(REXML::Text, t = r.root.children[0])
assert_equal('aaa', t.to_s)
end
def test_xmldecl
s = ''
r = HTree.parse(s + 'aaa ').to_rexml
assert_instance_of(REXML::XMLDecl, x = r.children[0])
assert_equal('1.0', x.version)
assert_equal(nil, x.standalone)
assert_instance_of(REXML::XMLDecl, HTree.parse(s).children[0].to_rexml)
end
def test_doctype
s = ''
r = HTree.parse(s + 'xxx ').to_rexml
assert_instance_of(REXML::DocType, d = r.children[0])
assert_equal('html', d.name)
assert_equal('PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"', d.external_id)
assert_instance_of(REXML::DocType, HTree.parse(s).children[0].to_rexml)
end
def test_procins
r = HTree.parse(' ').to_rexml
assert_instance_of(REXML::Instruction, i = r.root.children[0])
assert_equal('xxx', i.target)
assert_equal('yyy', i.content)
assert_instance_of(REXML::Instruction, HTree.parse('').children[0].to_rexml)
end
def test_comment
r = HTree.parse(' ').to_rexml
assert_instance_of(REXML::Comment, c = r.root.children[0])
assert_equal(' zzz ', c.to_s)
end
def test_bogusetag
assert_equal(nil, HTree.parse('').children[0].to_rexml)
end
def test_style
assert_equal('', HTree.parse('').to_rexml.to_s[//])
end
end if defined? REXML
htree-0.8/test/test-context.rb 0000644 0001750 0001750 00000002020 11747021106 015434 0 ustar jonas jonas require 'test/unit'
require 'htree/context'
class TestContext < Test::Unit::TestCase
def test_namespaces_validation
assert_raise(ArgumentError) { HTree::Context.new({1=>'u'}) }
assert_raise(ArgumentError) { HTree::Context.new({''=>'u'}) }
assert_raise(ArgumentError) { HTree::Context.new({'p'=>nil}) }
assert_nothing_raised { HTree::Context.new({nil=>'u'}) }
end
def test_namespace_uri
assert_equal('http://www.w3.org/XML/1998/namespace',
HTree::Context.new.namespace_uri('xml'))
assert_equal('u', HTree::Context.new({nil=>'u'}).namespace_uri(nil))
assert_equal('u', HTree::Context.new({'p'=>'u'}).namespace_uri('p'))
assert_equal(nil, HTree::Context.new({'p'=>'u'}).namespace_uri('q'))
end
def test_subst_namespaces
c1 = HTree::Context.new({'p'=>'u'})
c2 = c1.subst_namespaces({'q'=>'v'})
assert_equal('u', c1.namespace_uri('p'))
assert_equal(nil, c1.namespace_uri('q'))
assert_equal('u', c2.namespace_uri('p'))
assert_equal('v', c2.namespace_uri('q'))
end
end
htree-0.8/test/test-raw_string.rb 0000644 0001750 0001750 00000000723 11747021106 016137 0 ustar jonas jonas require 'test/unit'
require 'htree'
class TestRawString < Test::Unit::TestCase
def test_elem
t = HTree.parse("x ")
assert_equal("x ", t.root.raw_string)
assert_equal("x ", t.root.raw_string) # raw_string shouldn't have side effect.
end
def test_no_raw_string
t = HTree::Elem.new('a')
assert_equal(nil, t.raw_string)
t = HTree::Elem.new('a', HTree.parse("x ").root)
assert_equal(nil, t.raw_string)
end
end
htree-0.8/test/test-elem-new.rb 0000644 0001750 0001750 00000006363 11747021106 015477 0 ustar jonas jonas require 'test/unit'
require 'htree/doc'
require 'htree/elem'
require 'htree/equality'
require 'htree/traverse'
class TestElemNew < Test::Unit::TestCase
def test_empty
e = HTree::Elem.new('a')
assert_equal('a', e.qualified_name)
assert_equal({}, e.attributes)
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
assert_equal([], e.children)
assert_equal(true, e.empty_element?)
assert_nil(e.instance_variable_get(:@etag))
end
def test_empty_array
e = HTree::Elem.new('a', [])
assert_equal('a', e.qualified_name)
assert_equal({}, e.attributes)
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
assert_equal([], e.children)
assert_equal(false, e.empty_element?)
assert_equal(nil, e.instance_variable_get(:@etag))
end
def test_empty_attr
e = HTree::Elem.new('a', {'href'=>'xxx'})
assert_equal('a', e.qualified_name)
assert_equal({HTree::Name.parse_attribute_name('href', HTree::DefaultContext)=>HTree::Text.new('xxx')}, e.attributes)
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
assert_equal([], e.children)
assert_equal(true, e.empty_element?)
assert_equal(nil, e.instance_variable_get(:@etag))
end
def test_node
t = HTree::Text.new('t')
e = HTree::Elem.new('a', t)
assert_equal({}, e.attributes)
assert_equal([t], e.children)
end
def test_hash
t = HTree::Text.new('t')
e = HTree::Elem.new('a', {'b' => t})
assert_equal([['b', t]], e.attributes.map {|n,v| [n.universal_name, v] })
assert_equal([], e.children)
end
def test_string
t = HTree::Text.new('s')
e = HTree::Elem.new('a', "s")
assert_equal({}, e.attributes)
assert_equal([t], e.children)
end
def test_interleave
t = HTree::Text.new('t')
e = HTree::Elem.new('a', t, {'b' => t}, t, {'c' => 'd'}, t)
assert_equal([['b', t], ['c', HTree::Text.new('d')]],
e.attributes.map {|n,v| [n.universal_name, v] }.sort)
assert_equal([t, t, t], e.children)
end
def test_nest
t = HTree::Text.new('t')
b = HTree::BogusETag.new('a')
x = HTree::Elem.new('e', HTree::XMLDecl.new('1.0'))
d = HTree::Elem.new('e', HTree::DocType.new('html'))
e = HTree::Elem.new('a', [t, t, t, b, x, d])
assert_equal({}, e.attributes)
assert_equal([t, t, t, b, x, d], e.children)
end
def test_err
assert_raises(TypeError) { HTree::Elem.new('e', HTree::STag.new('a')) }
assert_raises(TypeError) { HTree::Elem.new('e', HTree::ETag.new('a')) }
end
def test_context
context = HTree::DefaultContext.subst_namespaces({'p'=>'u'})
elem = HTree::Elem.new('p:n', {'p:a'=>'t'}, context)
assert_equal('{u}n', elem.name)
assert_equal('t', elem.get_attr('{u}a'))
assert_same(context, elem.instance_variable_get(:@stag).inherited_context)
assert_raises(ArgumentError) { HTree::Elem.new('e', context, context) }
end
def test_hash_in_array
attrs = [{'a'=>'1'}, {'a'=>'2'}]
assert_raises(TypeError) { HTree::Elem.new('e', attrs) }
attrs.pop
assert_raises(TypeError) { HTree::Elem.new('e', attrs) }
attrs.pop
assert_equal([], attrs)
assert_equal(false, HTree::Elem.new('e', attrs).empty_element?)
end
end
htree-0.8/test/test-parse.rb 0000644 0001750 0001750 00000007622 11747021106 015077 0 ustar jonas jonas require 'test/unit'
require 'htree/parse'
require 'htree/equality'
require 'htree/traverse'
class TestParse < Test::Unit::TestCase
def test_empty
assert_equal(HTree::Doc.new([]), HTree.parse_xml("").eliminate_raw_string)
end
def test_xmlns_default
t1 = HTree::Doc.new([
HTree::Elem.new!(
HTree::STag.new('x1', [['xmlns', 'bb']],
HTree::DefaultContext.subst_namespaces({'xml'=>'http://www.w3.org/XML/1998/namespace'})),
[HTree::Elem.new!(HTree::STag.new('x2', [],
HTree::DefaultContext.subst_namespaces({nil => 'bb', 'xml'=>'http://www.w3.org/XML/1998/namespace'})), nil)])
])
t2 = HTree.parse_xml('')
assert_equal(t1, t2)
end
def test_doctype_root_element_name
assert_equal('html',
HTree.parse('').children[0].root_element_name)
# xxx: should be downcased?
assert_equal('HTML',
HTree.parse('').children[1].root_element_name)
end
def test_doctype_system_identifier
assert_equal('http://www.w3.org/TR/html4/loose.dtd',
HTree.parse("").children[0].system_identifier)
assert_equal('http://www.w3.org/TR/html4/loose.dtd',
HTree.parse("").children[0].system_identifier)
end
def test_procins
t = HTree.parse_xml("").children[0]
assert_equal('x', t.target)
assert_equal(nil, t.content)
end
def test_eol_html
t1 = HTree::Elem.new('a', "\nb\n")
s = "\nb\n "
t2 = HTree.parse_xml(s).root
assert_equal(t1, t2)
assert_equal(s, t2.raw_string)
end
def test_parse_html
t1 = HTree.parse("a")
assert_equal("{http://www.w3.org/1999/xhtml}html", t1.root.element_name.universal_name)
end
def test_bare_url
t1 = HTree::Elem.new('a', {'href'=>'http://host/'})
s = ""
t2 = HTree.parse(s).root
assert_equal(t1, t2)
end
def test_bare_slash
t1 = HTree::Elem.new('n', {'a'=>'v/'}, 'x')
s = "x"
t2 = HTree.parse(s).root
assert_equal(t1, t2)
end
def test_bare_slash_empty
t1 = HTree::Elem.new('n', {'a'=>'v/'})
s = ""
t2 = HTree.parse(s).root
assert_equal(t1, t2)
end
def test_downcase
assert_equal("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF",
HTree.parse(' ').root.name)
end
def test_downcase_name
# HTML && !XML
assert_equal('html', HTree.parse('').root.element_name.local_name)
assert_equal('html', HTree.parse('').root.element_name.local_name)
# HTML && XML
assert_equal('html', HTree.parse('').root.element_name.local_name)
assert_equal('v', HTree.parse('').root.get_attr('{u}Y'))
# !HTML && XML
assert_equal('RDF', HTree.parse(' ').children[1].element_name.local_name)
end
def test_script_etag
assert_equal(HTree::Doc.new(HTree::Elem.new('{http://www.w3.org/1999/xhtml}script', [])),
HTree.parse(''))
end
def test_html_emptyelem
t = HTree.parse('')
assert_equal(HTree::Doc.new(HTree::Elem.new('{http://www.w3.org/1999/xhtml}html')), t)
assert(!t.children[0].empty_element?)
end
def test_hr_emptyelem
t = HTree.parse(' ')
assert_equal(
HTree::Doc.new(
HTree::Elem.new('{http://www.w3.org/1999/xhtml}html',
HTree::Elem.new('{http://www.w3.org/1999/xhtml}hr'))), t)
assert(t.children[0].children[0].empty_element?)
end
end
htree-0.8/test/template.html 0000644 0001750 0001750 00000000117 11747021106 015154 0 ustar jonas jonas
dummy_title
htree-0.8/test/test-extract_text.rb 0000644 0001750 0001750 00000000523 11747021106 016474 0 ustar jonas jonas require 'test/unit'
require 'htree/extract_text'
require 'htree/equality'
class TestExtractText < Test::Unit::TestCase
def test_single
n = HTree::Text.new('abc')
assert_equal(n, n.extract_text)
end
def test_elem
t = HTree::Text.new('abc')
n = HTree::Elem.new('e', t)
assert_equal(t, n.extract_text)
end
end
htree-0.8/test/test-output.rb 0000644 0001750 0001750 00000010225 11747021106 015316 0 ustar jonas jonas require 'test/unit'
require 'htree'
class TestOutput < Test::Unit::TestCase
def gen(t, meth=:output, *rest)
encoder = HTree::Encoder.new('US-ASCII', 'US-ASCII')
t.__send__(meth, *(rest + [encoder, HTree::DefaultContext]))
encoder.finish
end
def test_text
assert_equal('a&<>"b', gen(HTree::Text.new('a&<>"b')))
assert_equal("abc&def", gen(HTree::Text.new("abc&def")))
assert_equal('"\'&', gen(HTree::Text.new('"\'&')))
assert_equal('"\'<&>', gen(HTree::Text.new('"\'<&>')))
end
def test_text_attvalue
assert_equal('"a&<>"b"', gen(HTree::Text.new('a&<>"b'), :output_attvalue))
assert_equal('"abc"', gen(HTree::Text.new("abc"), :output_attvalue))
assert_equal('"""', gen(HTree::Text.new('"'), :output_attvalue))
end
def test_name
assert_equal('abc', gen(HTree::Name.parse_element_name('abc', HTree::DefaultContext)))
assert_equal('n', gen(HTree::Name.new(nil, 'u', 'n')))
assert_equal('p:n', gen(HTree::Name.new('p', 'u', 'n')))
assert_equal('n', gen(HTree::Name.new(nil, '', 'n')))
assert_equal('xmlns', gen(HTree::Name.new('xmlns', nil, nil)))
assert_equal('xmlns:n', gen(HTree::Name.new('xmlns', nil, 'n')))
end
def test_name_attribute
assert_equal('abc="a&<>"b"',
gen(HTree::Name.parse_element_name('abc', HTree::DefaultContext),
:output_attribute,
HTree::Text.new('a&<>"b')))
end
def test_doc
t = HTree::Doc.new(HTree::Elem.new('a'), HTree::Elem.new('b'))
assert_equal(" ", gen(t))
end
def test_elem
t = HTree::Elem.new('a', [])
assert_equal(" ", gen(t))
assert_equal(" ",
gen(HTree::Elem.new!(HTree::STag.new('b'))))
assert_equal(" ",
gen(HTree::Elem.new!(HTree::STag.new('b'), [])))
assert_equal(" ",
gen(HTree::Elem.new!(HTree::STag.new('a'), [
HTree::Elem.new!(HTree::STag.new('b')),
HTree::Elem.new!(HTree::STag.new('c')),
HTree::Elem.new!(HTree::STag.new('d'))
])))
end
def test_elem_empty
t = HTree::Elem.new('a')
assert_equal(" ", gen(t))
end
def test_stag
assert_equal("",
gen(HTree::STag.new("name"), :output_stag))
assert_equal(" ",
gen(HTree::STag.new("name"), :output_emptytag))
assert_equal(" ",
gen(HTree::STag.new("name"), :output_etag))
assert_equal("",
gen(HTree::STag.new("name", [["a", "b"]]), :output_emptytag))
assert_equal("",
gen(HTree::STag.new("name", [['a', '<"\'>']]), :output_emptytag))
assert_equal("",
gen(HTree::STag.new("ppp:nnn", [["xmlns", "uuu\"b"]]), :output_emptytag))
end
def test_xmldecl
t = HTree::XMLDecl.new('1.0', 'US-ASCII')
assert_equal('', gen(t))
assert_equal('',
gen(t, :output_prolog_xmldecl))
end
def test_doctype
t = HTree::DocType.new('html',
'-//W3C//DTD HTML 4.01//EN',
'http://www.w3.org/TR/html4/strict.dtd')
assert_equal('', gen(t))
end
def test_procins
t = HTree::ProcIns.new('xml-stylesheet', 'type="text/xml" href="#style1"')
assert_equal('', gen(t))
t = HTree::ProcIns.new('x', nil)
assert_equal('', gen(t))
end
def test_comment
t = HTree::Comment.new('xxx')
assert_equal('', gen(t))
end
end
class TestHTMLOutput < Test::Unit::TestCase
def test_top_xmlns
assert_equal("aaa", HTree("aaa").display_html(""))
end
def test_script
assert_equal("",
HTree("",
HTree.expand_template('') {""}.gsub(/\n/, ''))
end
def test_xml_script
v = "x",
HTree.expand_template('') {""}.gsub(/\n/, ''))
end
def test_html_script_invalid_content
v = "x "}
}
end
def test_stylexxx
v = "xx<y ",
HTree.expand_template('') {"ab "}.gsub(/\n/, ''))
end
end
class TestCharset2 < Test::Unit::TestCase
class CharsetString < String
attr_accessor :charset
end
def with_kcode(kcode)
if "".respond_to? :force_encoding
if HTree::Encoder.internal_charset.start_with?(kcode.upcase)
yield
end
else
old = $KCODE
begin
$KCODE = kcode
yield
ensure
$KCODE = old
end
end
end
def test_us_ascii
with_kcode('E') {
out = HTree.expand_template(CharsetString.new) { "abc" }
assert_equal(out.charset, 'US-ASCII')
}
end
def test_euc_jp
with_kcode('E') {
out = HTree.expand_template(CharsetString.new) {
str = "\xa1\xa1"
str.force_encoding("EUC-JP") if str.respond_to? :force_encoding
str
}
assert_equal(out.charset, 'EUC-JP')
}
end
def test_utf_8
with_kcode('U') {
out = HTree.expand_template(CharsetString.new) {
str = "\xc2\xa1"
str.force_encoding("UTF-8") if str.respond_to? :force_encoding
str
}
assert_equal(out.charset, 'UTF-8')
}
end
end
class TestTemplateDOCTYPE < Test::Unit::TestCase
def test_html
assert_equal(
'',
HTree.expand_template('') {''}.gsub(/\n/, ''))
end
end
htree-0.8/README 0000644 0001750 0001750 00000001710 11747021106 012354 0 ustar jonas jonas = htree - HTML/XML tree library
htree provides a tree data structure which represent HTML and XML data.
== Feature
* Permissive unified HTML/XML parser
* byte-to-byte round-tripping unparser
* XML namespace support
* Dedicated class for escaped string. This ease sanitization.
* HTML/XHTML/XML generator
* template engine
* recursive template expansion
* converter to REXML document
== Home Page
http://www.a-k-r.org/htree/
== Requirements
* ruby : http://www.ruby-lang.org/
== Download
* latest release: http://www.a-k-r.org/htree/htree-0.8.tar.gz
* development version: https://github.com/akr/htree
== Install
% ruby install.rb
== Reference Manual
See rdoc/index.html or
http://www.a-k-r.org/htree/rdoc/
== Usage
Following two-line script convert HTML to XHTML.
require 'htree'
HTree(STDIN).display_xml
The conversion method to REXML is provided as to_rexml.
HTree(...).to_rexml
== License
Ruby's
== Author
Tanaka Akira
htree-0.8/Makefile 0000644 0001750 0001750 00000000524 11747021106 013136 0 ustar jonas jonas RUBY=ruby
all: README rdoc/index.html
README: misc/README.erb
erb misc/README.erb > README
check test:
$(RUBY) -I. test-all.rb
install:
$(RUBY) install.rb
.PHONY: check test all install
RB = htree.rb htree/modules.rb $(wildcard htree/[a-l]*.rb) $(wildcard htree/[n-z]*.rb)
rdoc/index.html: $(RB)
rm -rf doc
rdoc --op rdoc $(RB)
htree-0.8/install.rb 0000644 0001750 0001750 00000004657 11747021106 013504 0 ustar jonas jonas #!/usr/bin/env ruby
# usage: ruby install.rb [-n] [--destdir=DESTDIR]
# options:
# -n : don't install
# --destdir=DESTDIR
#
# Author: Tanaka Akira
require 'optparse'
require 'fileutils'
def target_directory
$:.each {|loc|
if %r{/site_ruby/[\d.]+\z} =~ loc
return loc
end
}
raise "could not find target install directory"
end
CVS_FILES = {}
def cvs_files(dir)
return CVS_FILES[dir] if CVS_FILES.include? dir
if File.directory? "#{dir}/CVS"
result = {}
File.foreach("#{dir}/CVS/Entries") {|line|
case line
when %r{\A/([^/]+)/} then result[$1] = true
when %r{\AD/([^/]+)/} then result[$1] = true
end
}
else
result = nil
end
CVS_FILES[dir] = result
result
end
def each_target(&block)
target_set = {}
cvs = cvs_files('.')
Dir.glob("*.rb") {|filename|
next if /\Atest-/ =~ filename
next if /\Ainstall/ =~ filename
next if cvs && !cvs.include?(filename)
target_set[filename] = true
yield filename
each_require(filename, target_set, &block)
}
end
def each_require(file, target_set, &block)
File.foreach(file) {|line|
next if /\A\s*require\s+['"]([^'"]+)['"]/ !~ line
feature = $1
filename = "#{feature}.rb"
next if target_set.include? filename
next if !File.exist?(filename)
target_set[filename] = true
yield filename
each_require(filename, target_set, &block)
}
end
def collect_target
result = []
each_target {|filename| result << filename }
result.sort!
result
end
def install_file(src, dst)
ignore_exc(Errno::ENOENT) { return if FileUtils.compare_file src, dst }
# check shadow
ignore_exc(Errno::ENOENT) { File.unlink dst }
FileUtils.mkdir_p(File.dirname(dst), :mode=>0755)
FileUtils.cp(src, dst, :verbose => true)
File.chmod(0644, dst)
end
def ignore_exc(exc)
begin
yield
rescue exc
end
end
$opt_n = false
$opt_destdir = ""
ARGV.options {|q|
q.banner = 'ruby install.rb [opts]'
q.def_option('--help', 'show this message') {puts q; exit(0)}
q.def_option('-n', "don't install") { $opt_n = true }
q.def_option('--destdir=DESTDIR', "specify DESTDIR") {|destdir| $opt_destdir = destdir }
q.parse!
}
if $opt_n
dir = target_directory
collect_target.each {|filename|
puts "-> #{$opt_destdir}#{dir}/#{filename}"
}
exit
else
File.umask 022
dir = target_directory
collect_target.each {|filename|
install_file filename, "#{$opt_destdir}#{dir}/#{filename}"
}
end
htree-0.8/htree.rb 0000644 0001750 0001750 00000005212 11747021106 013131 0 ustar jonas jonas #
# = htree.rb
#
# HTML/XML document tree
#
# Author:: Tanaka Akira
#
# == Features
#
# - Permissive unified HTML/XML parser
# - byte-to-byte round-tripping unparser
# - XML namespace support
# - Dedicated class for escaped string. This ease sanitization.
# - XHTML/XML generator
# - template engine: link:files/htree/template_rb.html
# - recursive template expansion
# - REXML tree generator: link:files/htree/rexml_rb.html
#
# == Example
#
# The following one-liner prints parsed tree object.
#
# % ruby -rhtree -e 'pp HTree(ARGF)' html-file
#
# The following two-line script convert HTML to XHTML.
#
# require 'htree'
# HTree(STDIN).display_xml
#
# The conversion method to REXML is provided as to_rexml.
#
# HTree(...).to_rexml
#
# == Module/Class Hierarchy
#
# * HTree
# * HTree::Name
# * HTree::Context
# * HTree::Location
# * HTree::Node
# * HTree::Doc
# * HTree::Elem
# * HTree::Text
# * HTree::XMLDecl
# * HTree::DocType
# * HTree::ProcIns
# * HTree::Comment
# * HTree::BogusETag
# * HTree::Error
#
# == Method Summary
#
# HTree provides following methods.
#
# - Parsing Methods
# - HTree(html_string ) -> HTree::Doc
# - HTree.parse(input ) -> HTree::Doc
#
# - Generation Methods
# - HTree::Node#display_xml -> STDOUT
# - HTree::Node#display_xml(out ) -> out
# - HTree::Node#display_xml(out , encoding ) -> out
# - HTree::Text#to_s -> String
#
# - Template Methods
# - HTree.expand_template{template_string } -> STDOUT
# - HTree.expand_template(out ){template_string } -> out
# - HTree.expand_template(out , encoding ){template_string } -> out
# - HTree.compile_template(template_string ) -> Module
# - HTree{template_string } -> HTree::Doc
#
# - Traverse Methods
# - HTree::Elem#attributes -> Hash[HTree::Name -> HTree::Text]
# - HTree::Elem::Location#attributes -> Hash[HTree::Name -> HTree::Location]
#
# - Predicate Methods
# - HTree::Traverse#doc? -> true or false
# - HTree::Traverse#elem? -> true or false
# - HTree::Traverse#text? -> true or false
# - HTree::Traverse#xmldecl? -> true or false
# - HTree::Traverse#doctype? -> true or false
# - HTree::Traverse#procins? -> true or false
# - HTree::Traverse#comment? -> true or false
# - HTree::Traverse#bogusetag? -> true or false
#
# - REXML Tree Generator
# - HTree::Node#to_rexml -> REXML::Child
require 'htree/parse'
require 'htree/extract_text'
require 'htree/equality'
require 'htree/inspect'
require 'htree/display'
require 'htree/loc'
require 'htree/traverse'
require 'htree/template'
require 'htree/rexml'
htree-0.8/.cvsignore 0000644 0001750 0001750 00000000033 11747021106 013471 0 ustar jonas jonas index.html
README.html
doc