nokogiri-1.6.1/ 0000755 0001750 0001750 00000000000 12261213762 012720 5 ustar boutil boutil nokogiri-1.6.1/test_all 0000755 0001750 0001750 00000004111 12261213762 014452 0 ustar boutil boutil #! /usr/bin/env bash
#
# script to run tests on all relevant rubies, and valgrind on supported rubies.
# outputs tests to `test.log` and valgrind output to `valgrind.log`.
#
# requires `rvm` to be installed. sorry about that, multiruby dudes.
#
# it's worth periodically using hoe-debugger's ability to generate
# valgrind suppression files to remove spurious valgrind messages
# (e.g., 1.9.3's glob_helper). ["rake test:valgrind:suppression"]
#
RUBIES="ruby-1.9.3-p327 jruby-1.7.3 jruby-1.6.5.1 jruby-1.6.7.2 ruby-1.9.2-p320"
TEST_LOG=test.log
VALGRIND_LOG=valgrind.log
# Load RVM into a shell session *as a function*
if [[ -s "$HOME/.rvm/scripts/rvm" ]] ; then
source "$HOME/.rvm/scripts/rvm"
elif [[ -s "/usr/local/rvm/scripts/rvm" ]] ; then
source "/usr/local/rvm/scripts/rvm"
else
echo "ERROR: An RVM installation was not found.\n"
fi
> $TEST_LOG
> $VALGRIND_LOG
set -o errexit
function rvm_use {
current_ruby=$1
rvm use "${1}@nokogiri" --create || rvm -v
}
function generate_parser_and_tokenizer {
old_ruby=$current_ruby
rvm_use ruby-1.9.3-p327
bundle exec rake generate 2>&1 > /dev/null
rvm_use $old_ruby
}
function clean {
bundle exec rake clean clobber 2>&1 > /dev/null
}
function compile {
echo "** compiling ..."
# generate_parser_and_tokenizer
bundle exec rake compile 2>&1 > /dev/null
}
for ruby in $RUBIES ; do
rvm_use ${ruby}
if ! which bundle ; then
gem install bundler
fi
bundle install --quiet --local || bundle install
clean
done
for ruby in $RUBIES ; do
rvm_use ${ruby}
echo -e "**\n** testing nokogiri on ${ruby}\n**" | tee -a $TEST_LOG
clean
compile
echo "** running tests ..."
bundle exec rake test 2>&1 | tee -a $TEST_LOG
clean
done
for ruby in $RUBIES ; do
if [[ ! $ruby =~ "jruby" ]] ; then
rvm_use ${ruby}
echo -e "**\n** nokogiri prerelease: ${ruby}\n**" | tee -a $VALGRIND_LOG
clean
compile
echo "** running valgrind on tests ..."
bundle exec rake test:valgrind 2>&1 | tee -a $VALGRIND_LOG
clean
fi
done
nokogiri-1.6.1/CHANGELOG.rdoc 0000644 0001750 0001750 00000074763 12261213762 015101 0 ustar boutil boutil === 1.6.1 / 2013-12-14
* Bugfixes
* (JRuby) Fix out of memory bug when certain invalid documents are parsed.
* (JRuby) Fix regression of billion-laughs vulnerability. #586
=== 1.6.0 / 2013-06-08
This release was based on v1.5.10 and 1.6.0.rc1, and contains changes
mentioned in both.
* Deprecations
* Remove pre 1.9 monitoring from Travis.
=== 1.6.0.rc1 / 2013-04-14
This release was based on v1.5.9, and so does not contain any fixes
mentioned in the notes for v1.5.10.
* Notes
* mini_portile is now a runtime dependency
* Ruby 1.9.2 and higher now required
* Features
* (MRI) Source code for libxml 2.8.0 and libxslt 1.2.26 is packaged
with the gem. These libraries are compiled at gem install time
unless the environment variable NOKOGIRI_USE_SYSTEM_LIBRARIES is
set. VERSION_INFO (also `nokogiri -v`) exposes whether libxml was
compiled from packaged source, or the system library was used.
* (Windows) libxml upgraded to 2.8.0
* Deprecations
* Support for Ruby 1.8.7 and prior has been dropped
=== 1.5.11 / 2013-11-09
* Bugfixes
* (JRuby) Fix out of memory bug when certain invalid documents are parsed.
* (JRuby) Fix regression of billion-laughs vulnerability. #568
=== 1.5.10 / 2013-06-07
* Bugfixes
* (JRuby) Fix "null document" error when parsing an empty IO in jruby 1.7.3. #883
* (JRuby) Fix schema validation when XSD has DOCTYPE set to DTD. #861 (Thanks, Patrick Cheng!)
* (MRI) Fix segfault when there is no default subelement for an HTML node. #917
* Notes
* Use rb_ary_entry instead of RARRAY_PTR (you know, for Rubinius). #877 (Thanks, Dirkjan Bussink!)
* Fix TypeError when running tests. #900 (Thanks, Cédric Boutillier!)
=== 1.5.9 / 2013-03-21
* Bugfixes
* Ensure that prefixed attributes are properly namespaced when reparented. #869
* Fix for inconsistent namespaced attribute access for SVG nested in HTML. #861
* (MRI) Fixed a memory leak in fragment parsing if nodes are not all subsequently reparented. #856
=== 1.5.8 / 2013-03-19
* Bugfixes
* (JRuby) Fix EmptyStackException thrown by elements with xlink:href attributes and no base_uri #534, #805. (Thanks, Patrick Quinn and Brian Hoffman!)
* Fixes duplicate attributes issue introduced in 1.5.7. #865
* Allow use of a prefixed namespace on a root node using Nokogiri::XML::Builder #868
=== 1.5.7 / 2013-03-18
* Features
* Windows support for Ruby 2.0.
* Bugfixes
* SAX::Parser.parse_io throw an error when used with lower case encoding. #828
* (JRuby) Java Nokogiri is finally green (passes all tests) under 1.8 and 1.9 mode. High five everyone. #798, #705
* (JRuby) Nokogiri::XML::Reader broken (as a pull parser) on jruby - reads the whole XML document. #831
* (JRuby) JRuby hangs parsing "&". #837
* (JRuby) JRuby NPE parsing an invalid XML instruction. #838
* (JRuby) Node#content= incompatibility. #839
* (JRuby) to_xhtml doesn't print the last slash for self-closing tags in JRuby. #834
* (JRuby) Adding an EntityReference after a Text node mangles the entity in JRuby. #835
* (JRuby) JRuby version inconsistency: nil for empty attributes. #818
* CSS queries for classes (e.g., ".foo") now treat all whitespace identically. #854
* Namespace behavior cleaned up and made consistent between JRuby and MRI. #846, #801 (Thanks, Michael Klein!)
* (MRI) SAX parser handles empty processing instructions. #845
=== 1.5.6 / 2012-12-19
* Features
* Improved performance of XML::Document#collect_namespaces. #761 (Thanks, Juergen Mangler!)
* New callback SAX::Document#processing_instruction (Thanks, Kitaiti Makoto!)
* Node#native_content= allows setting unescaped node contant. #768
* XPath lookup with namespaces supports symbol keys. #729 (Thanks, Ben Langfeld.)
* XML::Node#[]= stringifies values. #729 (Thanks, Ben Langfeld.)
* bin/nokogiri will process a document from $stdin
* bin/nokogiri -e will execute a program from the command line
* (JRuby) bin/nokogiri --version will print the Xerces and NekoHTML versions.
* Bugfixes
* Nokogiri now detects XSLT transform errors. #731 (Thanks, Justin Fitzsimmons!)
* Don't throw an Error when trying to replace top-level text node in DocumentFragment. #775
* Raise an ArgumentError if an invalid encoding is passed to the SAX parser. #756 (Thanks, Bradley Schaefer!)
* Prefixed element inconsistency between CRuby and JRuby. #712
* (JRuby) space prior to xml preamble causes nokogiri to fail parsing. (fixed along with #748) #790
* (JRuby) Fixed the bug Nokogiri::XML::Node#content inconsistency between Java and C. #794, #797
* (JRuby) raises INVALID_CHARACTER_ERR exception when EntityReference name starts with '#'. #719
* (JRuby) doesn't coerce namespaces out of strings on a direct subclass of Node. #715
* (JRuby) Node#content now renders newlines properly. #737 (Thanks, Piotr Szmielew!)
* (JRuby) Unknown namespace are ignore when the recover option is used. #748
* (JRuby) XPath queries for namespaces should not throw exceptions when called twice in a row. #764
* (JRuby) More consistent (with libxml2) whitespace formatting when emitting XML. #771
* (JRuby) namespaced attributes broken when appending raw xml to builder. #770
* (JRuby) Nokogiri::XML::Document#wrap raises undefined method `length' for nil:NilClass when trying to << to a node. #781
* (JRuby) Fixed "bad file descriptor" bug when closing open file descriptors. #495
* (JRuby) JRuby/CRuby incompatibility for attribute decorators. #785
* (JRuby) Issues parsing valid XML with no internal subset in the DTD. #547, #811
* (JRuby) Issues parsing valid node content when it contains colons. #728
* (JRuby) Correctly parse the doc type of html documents. #733
* (JRuby) Include dtd in the xml output when a builder is used with create_internal_subset. #751
* (JRuby) builder requires textwrappers for valid utf8 in jruby, not in mri. #784
=== 1.5.5 / 2012-06-24
* Features
* Much-improved support for JRuby in 1.9 mode! Yay!
* Bugfixes
* Regression in JRuby Nokogiri add_previous_sibling (1.5.0 -> 1.5.1) #691 (Thanks, John Shahid!)
* JRuby unable to create HTML doc if URL arg provided #674 (Thanks, John Shahid!)
* JRuby raises NullPointerException when given HTML document is nil or empty string. #699
* JRuby 1.9 error, uncaught throw 'encoding_found', has been fixed. #673
* Invalid encoding returned in JRuby with US-ASCII. #583
* XmlSaxPushParser raises IndexOutOfBoundsException when over 512 characters are given. #567, #615
* When xpath evaluation returns empty NodeSet, decorating NodeSet's base document raises exception. #514
* JRuby raises exception when xpath with namespace is specified. pull request #681 (Thanks, Piotr Szmielew)
* JRuby renders nodes without their namespace when subclassing Node. #695
* JRuby raises NAMESPACE_ERR (org.w3c.dom.DOMException) while instantiating RDF::RDFXML::Writer. #683
* JRuby is not able to use namespaces in xpath. #493
* JRuby's Entity resolving should be consistent with C-Nokogiri #704, #647, #703
=== 1.5.4 / 2012-06-12
* Features
* The "nokogiri" script now has more verbose output when passed the `--rng` option. #675 (Thanks, Dan Radez!)
* Build support on hardened Debian systems that use `-Werror=format-security`. #680.
* Better build support for systems with pkg-config. #584
* Better build support for systems with multiple iconv installations.
* Bugfixes
* Segmentation fault when creating a comment node for a DocumentFragment. #677, #678.
* Treat '.' as xpath in at() and search(). #690
* (MRI, Security) Default parse options for XML documents were
changed to not make network connections during document parsing,
to avoid XXE vulnerability. #693
To re-enable this behavior, the configuration method `nononet` may
be called, like this:
Nokogiri::XML::Document.parse(xml) { |config| config.nononet }
Insert your own joke about double-negatives here.
=== 1.5.3 / 2012-06-01
* Features
* Support for "prefixless" CSS selectors ~, > and + like jQuery
supports. #621, #623. (Thanks, David Lee!)
* Attempting to improve installation on homebrew 0.9 (with regards
to iconv). Isn't package management convenient?
* Bugfixes
* Custom xpath functions with empty nodeset arguments cause a
segfault. #634.
* Nokogiri::XML::Node#css now works for XML documents with default
namespaces when the rule contains attribute selector without
namespace.
* Fixed marshalling bugs around how arguments are passed to (and
returned from) XSLT custom xpath functions. #640.
* Nokogiri::XML::Reader#outer_xml is broken in JRuby #617
* Nokogiri::XML::Attribute on JRuby returns a nil namespace #647
* Nokogiri::XML::Node#namespace= cannot set a namespace without a
prefix on JRuby #648
* (JRuby) 1.9 mode causes dead lock while running rake #571
* HTML::Document#meta_encoding does not raise exception on docs with
malformed content-type. #655
* Fixing segfault related to unsupported encodings in in-context
parsing on 1.8.7. #643
* (JRuby) Concurrency issue in XPath parsing. #682
=== 1.5.2 / 2012-03-09
Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. #631, #632.
=== 1.5.1 / 2012-03-09
* Features
* XML::Builder#comment allows creation of comment nodes.
* CSS searches now support namespaced attributes. #593
* Java integration feature is added. Now, XML::Document.wrap
and XML::Document#to_java methods are available.
* RelaxNG validator support in the `nokogiri` cli utility. #591 (thanks, Dan Radez!)
* Bugfixes
* Fix many memory leaks and segfault opportunities. Thanks, Tim Elliott!
* extconf searches homebrew paths if homebrew is installed.
* Inconsistent behavior of Nokogiri 1.5.0 Java #620
* Inheriting from Nokogiri::XML::Node on JRuby (1.6.4/5) fails #560
* XML::Attr nodes are not allowed to be added as node children, so an
exception is raised. #558
* No longer defensively "pickle" adjacent text nodes on
Node#add_next_sibling and Node#add_previous_sibling calls. #595.
* Java version inconsistency: it returns nil for empty attributes #589
* to_xhtml incorrectly generates
when tag is empty #557
* Document#add_child now accepts a Node, NodeSet, DocumentFragment,
or String. #546.
* Document#create_element now recognizes namespaces containing
non-word characters (like "SOAP-ENV"). This is mostly relevant to
users of Builder, which calls Document#create_element for nearly
everything. #531.
* File encoding broken in 1.5.0 / jruby / windows #529
* Java version does not return namespace defs as attrs for ::HTML #542
* Bad file descriptor with Nokogiri 1.5.0 #495
* remove_namespace! doesn't work in pure java version #492
* The Nokogiri Java native build throws a null pointer exception
when ActiveSupport's .blank? method is called directly on a parsed
object. #489
* 1.5.0 Not using correct character encoding #488
* Raw XML string in XML Builder broken on JRuby #486
* Nokogiri 1.5.0 XML generation broken on JRuby #484
* Do not allow multiple root nodes. #550
* Fixes for custom XPath functions. #605, #606 (thanks, Juan Wajnerman!)
* Node#to_xml does not override :save_with if it is provided. #505
* Node#set is a private method (JRuby). #564 (thanks, Nick Sieger!)
* C14n cleanup and Node#canonicalize (thanks, Ivan Pirlik!) #563
=== 1.5.0 / 2011-07-01
* Notes
* See changelog from 1.4.7
* Features
* extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor)
* Bugfixes
* default output of XML on JRuby is no longer formatted due to
inconsistent whitespace handling. #415
* (JRuby) making empty NodeSets with null `nodes` member safe to operate on. #443
* Fix a bug in advanced encoding detection that leads to partially
duplicated document when parsing an HTML file with unknown
encoding.
* Add support for .
=== 1.5.0 beta3 / 2010/12/02
* Notes
* JRuby performance tuning
* See changelog from 1.4.4
* Bugfixes
* Node#inner_text no longer returns nil. (JRuby) #264
=== 1.5.0 beta2 / 2010/07/30
* Notes
* See changelog from 1.4.3
=== 1.5.0 beta1 / 2010/05/22
* Notes
* JRuby support is provided by a new pure-java backend.
* Deprecations
* Ruby 1.8.6 is deprecated. Nokogiri will install, but official support is ended.
* LibXML 2.6.16 and earlier are deprecated. Nokogiri will refuse to install.
* FFI support is removed.
=== 1.4.7 / 2011-07-01
* Bugfixes
* Fix a bug in advanced encoding detection that leads to partially
duplicated document when parsing an HTML file with unknown
encoding. Thanks, Timothy Elliott (@ender672)! #478
=== 1.4.6 / 2011-06-19
* Notes
* This version is functionally identical to 1.4.5.
* Ruby 1.8.6 support has been restored.
=== 1.4.5 / 2011-05-19
* New Features
* Nokogiri::HTML::Document#title accessor gets and sets the document title.
* extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor)
* Raise an exception if a string is passed to Nokogiri::XML::Schema#validate. #406
* Bugfixes
* Node#serialize-and-friends now accepts a SaveOption object as the, erm, save object.
* Nokogiri::CSS::Parser has-a Nokogiri::CSS::Tokenizer
* (JRUBY+FFI only) Weak references are now threadsafe. #355
* Make direct start_element() callback (currently used for
HTML::SAX::Parser) pass attributes in assoc array, just as
emulated start_element() callback does. rel. #356
* HTML::SAX::Parser should call back a block given to parse*() if any, just as XML::SAX::Parser does.
* Add further encoding detection to HTML parser that libxml2 does not do.
* Document#remove_namespaces! now handles attributes with namespaces. #396
* XSLT::Stylesheet#transform no longer segfaults when handed a non-XML::Document. #452
* XML::Reader no longer segfaults when under GC pressure. #439
=== 1.4.4 / 2010-11-15
* New Features
* XML::Node#children= sets the node's inner html (much like #inner_html=), but returns the reparent node(s).
* XSLT supports function extensions. #336
* XPath bind parameter substitution. #329
* XML::Reader node type constants. #369
* SAX Parser context provides line and column information
* Bugfixes
* XML::DTD#attributes returns an empty hash instead of nil when there are no attributes.
* XML::DTD#{keys,each} now work as expected. #324
* {XML,HTML}::DocumentFragment.{new,parse} no longer strip leading and trailing whitespace. #319
* XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace} return a NodeSet when passed a string.
* Unclosed tags parsed more robustly in fragments. #315
* XML::Node#{replace,add_previous_sibling,add_next_sibling} edge cases fixed related to libxml's text node merging. #308
* Fixed a segfault when GC occurs during xpath handler argument marshalling. #345
* Added hack to Slop decorator to work with previously defined methods. #330
* Fix a memory leak when duplicating child nodes. #353
* Fixed off-by-one bug with nth-last-{child,of-type} CSS selectors when NOT using an+b notation. #354
* Fixed passing of non-namespace attributes to SAX::Document#start_element. #356
* Workaround for libxml2 in-context parsing bug. #362
* Fixed NodeSet#wrap on nodes within a fragment. #331
=== 1.4.3 / 2010/07/28
* New Features
* XML::Reader#empty_element? returns true for empty elements. #262
* Node#remove_namespaces! now removes namespace *declarations* as well. #294
* NodeSet#at_xpath, NodeSet#at_css and NodeSet#> do what the corresponding
methods of Node do.
* Bugfixes
* XML::NodeSet#{include?,delete,push} accept an XML::Namespace
* XML::Document#parse added for parsing in the context of a document
* XML::DocumentFragment#inner_html= works with contextual parsing! #298, #281
* lib/nokogiri/css/parser.y Combined CSS functions + pseudo selectors fixed
* Reparenting text nodes is safe, even when the operation frees adjacent merged nodes. #283
* Fixed libxml2 versionitis issue with xmlFirstElementChild et al. #303
* XML::Attr#add_namespace now works as expected. #252
* HTML::DocumentFragment uses the string's encoding. #305
* Fix the CSS3 selector translation rule for the general sibling combinator
(a.k.a. preceding selector) that incorrectly converted "E ~ F G" to
"//F//G[preceding-sibling::E]".
=== 1.4.2 / 2010/05/22
* New Features
* XML::Node#parse will parse XML or HTML fragments with respect to the
context node.
* XML::Node#namespaces returns all namespaces defined in the node and all
ancestor nodes
(previously did not return ancestors' namespace definitions).
* Added Enumerable to XML::Node
* Nokogiri::XML::Schema#validate now uses xmlSchemaValidateFile if a
filename is passed, which is faster and more memory-efficient. GH #219
* XML::Document#create_entity will create new EntityDecl objects. GH #174
* JRuby FFI implementation no longer uses ObjectSpace._id2ref,
instead using Charles Nutter's rocking Weakling gem.
* Nokogiri::XML::Node#first_element_child fetch the first child node that is
an ELEMENT node.
* Nokogiri::XML::Node#last_element_child fetch the last child node that is
an ELEMENT node.
* Nokogiri::XML::Node#elements fetch all children nodes that are ELEMENT
nodes.
* Nokogiri::XML::Node#add_child, #add_previous_sibling, #before,
#add_next_sibling, #after, #inner_html, #swap and #replace all now
accept a Node, DocumentFragment, NodeSet, or a string containing
markup.
* Node#fragment? indicates whether a node is a DocumentFragment.
* Bugfixes
* XML::NodeSet is now always decorated (if the document has decorators).
GH #198
* XML::NodeSet#slice gracefully handles offset+length larger than the set
length. GH #200
* XML::Node#content= safely unlinks previous content. GH #203
* XML::Node#namespace= takes nil as a parameter
* XML::Node#xpath returns things other than NodeSet objects. GH #208
* XSLT::StyleSheet#transform accepts hashes for parameters. GH #223
* Psuedo selectors inside not() work. GH #205
* XML::Builder doesn't break when nodes are unlinked.
Thanks to vihai! GH #228
* Encoding can be forced on the SAX parser. Thanks Eugene Pimenov! GH #204
* XML::DocumentFragment uses XML::Node#parse to determine children.
* Fixed a memory leak in xml reader. Thanks sdor! GH #244
* Node#replace returns the new child node as claimed in the
RDoc. Previously returned +self+.
* Notes
* The Windows gems now bundle DLLs for libxml 2.7.6 and libxslt
1.1.26. Prior to this release, libxml 2.7.3 and libxslt 1.1.24
were bundled.
=== 1.4.1 / 2009/12/10
* New Features
* Added Nokogiri::LIBXML_ICONV_ENABLED
* Alias Node#[] to Node#attr
* XML::Node#next_element added
* XML::Node#> added for searching a nodes immediate children
* XML::NodeSet#reverse added
* Added fragment support to Node#add_child, Node#add_next_sibling,
Node#add_previous_sibling, and Node#replace.
* XML::Node#previous_element implemented
* Rubinius support
* Ths CSS selector engine now supports :has()
* XML::NodeSet#filter() was added
* XML::Node.next= and .previous= are aliases for add_next_sibling and add_previous_sibling. GH #183
* Bugfixes
* XML fragments with namespaces do not raise an exception (regression in 1.4.0)
* Node#matches? works in nodes contained by a DocumentFragment. GH #158
* Document should not define add_namespace() method. GH #169
* XPath queries returning namespace declarations do not segfault.
* Node#replace works with nodes from different documents. GH #162
* Adding XML::Document#collect_namespaces
* Fixed bugs in the SOAP4R adapter
* Fixed bug in XML::Node#next_element for certain edge cases
* Fixed load path issue with JRuby under Windows. GH #160.
* XSLT#apply_to will honor the "output method". Thanks richardlehane!
* Fragments containing leading text nodes with newlines now parse properly. GH #178.
=== 1.4.0 / 2009/10/30
* Happy Birthday!
* New Features
* Node#at_xpath returns the first element of the NodeSet matching the XPath
expression.
* Node#at_css returns the first element of the NodeSet matching the CSS
selector.
* NodeSet#| for unions GH #119 (Thanks Serabe!)
* NodeSet#inspect makes prettier output
* Node#inspect implemented for more rubyish document inspecting
* Added XML::DTD#external_id
* Added XML::DTD#system_id
* Added XML::ElementContent for DTD Element content validity
* Better namespace declaration support in Nokogiri::XML::Builder
* Added XML::Node#external_subset
* Added XML::Node#create_external_subset
* Added XML::Node#create_internal_subset
* XML Builder can append raw strings (GH #141, patch from dudleyf)
* XML::SAX::ParserContext added
* XML::Document#remove_namespaces! for the namespace-impaired
* Bugfixes
* returns nil when HTML documents do not declare a meta encoding tag. GH #115
* Uses RbConfig::CONFIG['host_os'] to adjust ENV['PATH'] GH #113
* NodeSet#search is more efficient GH #119 (Thanks Serabe!)
* NodeSet#xpath handles custom xpath functions
* Fixing a SEGV when XML::Reader gets attributes for current node
* Node#inner_html takes the same arguments as Node#to_html GH #117
* DocumentFragment#css delegates to it's child nodes GH #123
* NodeSet#[] works with slices larger than NodeSet#length GH #131
* Reparented nodes maintain their namespace. GH #134
* Fixed SEGV when adding an XML::Document to NodeSet
* XML::SyntaxError can be duplicated. GH #148
* Deprecations
* Hpricot compatibility layer removed
=== 1.3.3 / 2009/07/26
* New Features
* NodeSet#children returns all children of all nodes
* Bugfixes
* Override libxml-ruby's global error handler
* ParseOption#strict fixed
* Fixed a segfault when sending an empty string to Node#inner_html= GH #88
* String encoding is now set to UTF-8 in Ruby 1.9
* Fixed a segfault when moving root nodes between documents. GH #91
* Fixed an O(n) penalty on node creation. GH #101
* Allowing XML documents to be output as HTML documents
* Deprecations
* Hpricot compatibility layer will be removed in 1.4.0
=== 1.3.2 / 2009-06-22
* New Features
* Nokogiri::XML::DTD#validate will validate your document
* Bugfixes
* Nokogiri::XML::NodeSet#search will search top level nodes. GH #73
* Removed namespace related methods from Nokogiri::XML::Document
* Fixed a segfault when a namespace was added twice
* Made nokogiri work with Snow Leopard GH #79
* Mailing list has moved to: http://groups.google.com/group/nokogiri-talk
* HTML fragments now correctly handle comments and CDATA blocks. GH #78
* Nokogiri::XML::Document#clone is now an alias of dup
* Deprecations
* Nokogiri::XML::SAX::Document#start_element_ns is deprecated, please switch
to Nokogiri::XML::SAX::Document#start_element_namespace
* Nokogiri::XML::SAX::Document#end_element_ns is deprecated, please switch
to Nokogiri::XML::SAX::Document#end_element_namespace
=== 1.3.1 / 2009-06-07
* Bugfixes
* extconf.rb checks for optional RelaxNG and Schema functions
* Namespace nodes are added to the Document node cache
=== 1.3.0 / 2009-05-30
* New Features
* Builder changes scope based on block arity
* Builder supports methods ending in underscore similar to tagz
* Nokogiri::XML::Node#<=> compares nodes based on Document position
* Nokogiri::XML::Node#matches? returns true if Node can be found with
given selector.
* Nokogiri::XML::Node#ancestors now returns an Nokogiri::XML::NodeSet
* Nokogiri::XML::Node#ancestors will match parents against optional selector
* Nokogiri::HTML::Document#meta_encoding for getting the meta encoding
* Nokogiri::HTML::Document#meta_encoding= for setting the meta encoding
* Nokogiri::XML::Document#encoding= to set the document encoding
* Nokogiri::XML::Schema for validating documents against XSD schema
* Nokogiri::XML::RelaxNG for validating documents against RelaxNG schema
* Nokogiri::HTML::ElementDescription for fetching HTML element descriptions
* Nokogiri::XML::Node#description to fetch the node description
* Nokogiri::XML::Node#accept implements Visitor pattern
* bin/nokogiri for easily examining documents (Thanks Yutaka HARA!)
* Nokogiri::XML::NodeSet now supports more Array and Enumerable operators:
index, delete, slice, - (difference), + (concatenation), & (intersection),
push, pop, shift, ==
* Nokogiri.XML, Nokogiri.HTML take blocks that receive
Nokogiri::XML::ParseOptions objects
* Nokogiri::XML::Node#namespace returns a Nokogiri::XML::Namespace
* Nokogiri::XML::Node#namespace= for setting a node's namespace
* Nokogiri::XML::DocumentFragment and Nokogiri::HTML::DocumentFragment
have a sensible API and a more robust implementation.
* JRuby 1.3.0 support via FFI.
* Bugfixes
* Fixed a problem with nil passed to CDATA constructor
* Fragment method deals with regular expression characters
(Thanks Joel!) LH #73
* Fixing builder scope issues LH #61, LH #74, LH #70
* Fixed a problem when adding a child could remove the child namespace LH#78
* Fixed bug with unlinking a node then reparenting it. (GH#22)
* Fixed failure to catch errors during XSLT parsing (GH#32)
* Fixed a bug with attribute conditions in CSS selectors (GH#36)
* Fixed intolerance of HTML attributes without values in Node#before/after/inner_html=. (GH#35)
=== 1.2.3 / 2009-03-22
* Bugfixes
* Fixing bug where a node is passed in to Node#new
* Namespace should be assigned on DocumentFragment creation. LH #66
* Nokogiri::XML::NodeSet#dup works GH #10
* Nokogiri::HTML returns an empty Document when given a blank string GH#11
* Adding a child will remove duplicate namespace declarations LH #67
* Builder methods take a hash as a second argument
=== 1.2.2 / 2009-03-14
* New features
* Nokogiri may be used with soap4r. See XSD::XMLParser::Nokogiri
* Nokogiri::XML::Node#inner_html= to set the inner html for a node
* Nokogiri builder interface improvements
* Nokogiri::XML::Node#swap swaps html for current node (LH #50)
* Bugfixes
* Fixed a tag nesting problem in the Builder API (LH #41)
* Nokogiri::HTML.fragment will properly handle text only nodes (LH #43)
* Nokogiri::XML::Node#before will prepend text nodes (LH #44)
* Nokogiri::XML::Node#after will append text nodes
* Nokogiri::XML::Node#search automatically registers root namespaces (LH #42)
* Nokogiri::XML::NodeSet#search automatically registers namespaces
* Nokogiri::HTML::NamedCharacters delegates to libxml2
* Nokogiri::XML::Node#[] can take a symbol (LH #48)
* vasprintf for windows updated. Thanks Geoffroy Couprie!
* Nokogiri::XML::Node#[]= should not encode entities (LH #55)
* Namespaces should be copied to reparented nodes (LH #56)
* Nokogiri uses encoding set on the string for default in Ruby 1.9
* Document#dup should create a new document of the same type (LH #59)
* Document should not have a parent method (LH #64)
=== 1.2.1 / 2009-02-23
* Bugfixes
* Fixed a CSS selector space bug
* Fixed Ruby 1.9 String Encoding (Thanks 角谷さん!)
=== 1.2.0 / 2009-02-22
* New features
* CSS search now supports CSS3 namespace queries
* Namespaces on the root node are automatically registered
* CSS queries use the default namespace
* Nokogiri::XML::Document#encoding get encoding used for this document
* Nokogiri::XML::Document#url get the document url
* Nokogiri::XML::Node#add_namespace add a namespace to the node LH#38
* Nokogiri::XML::Node#each iterate over attribute name, value pairs
* Nokogiri::XML::Node#keys get all attribute names
* Nokogiri::XML::Node#line get the line number for a node (Thanks Dirkjan Bussink!)
* Nokogiri::XML::Node#serialize now takes an optional encoding parameter
* Nokogiri::XML::Node#to_html, to_xml, and to_xhtml take an optional encoding
* Nokogiri::XML::Node#to_str
* Nokogiri::XML::Node#to_xhtml to produce XHTML documents
* Nokogiri::XML::Node#values get all attribute values
* Nokogiri::XML::Node#write_to writes the node to an IO object with optional encoding
* Nokogiri::XML::ProcessingInstrunction.new
* Nokogiri::XML::SAX::PushParser for all your push parsing needs.
* Bugfixes
* Fixed Nokogiri::XML::Document#dup
* Fixed header detection. Thanks rubikitch!
* Fixed a problem where invalid CSS would cause the parser to hang
* Deprecations
* Nokogiri::XML::Node.new_from_str will be deprecated in 1.3.0
* API Changes
* Nokogiri::HTML.fragment now returns an XML::DocumentFragment (LH #32)
=== 1.1.1
* New features
* Added XML::Node#elem?
* Added XML::Node#attribute_nodes
* Added XML::Attr
* XML::Node#delete added.
* XML::NodeSet#inner_html added.
* Bugfixes
* Not including an HTML entity for \r for HTML nodes.
* Removed CSS::SelectorHandler and XML::XPathHandler
* XML::Node#attributes returns an Attr node for the value.
* XML::NodeSet implements to_xml
=== 1.1.0
* New Features
* Custom XPath functions are now supported. See Nokogiri::XML::Node#xpath
* Custom CSS pseudo classes are now supported. See Nokogiri::XML::Node#css
* Nokogiri::XML::Node#<< will add a child to the current node
* Bugfixes
* Mutex lock on CSS cache access
* Fixed build problems with GCC 3.3.5
* XML::Node#to_xml now takes an indentation argument
* XML::Node#dup takes an optional depth argument
* XML::Node#add_previous_sibling returns new sibling node.
=== 1.0.7
* Bugfixes
* Fixed memory leak when using Dike
* SAX parser now parses IO streams
* Comment nodes have their own class
* Nokogiri() should delegate to Nokogiri.parse()
* Prepending rather than appending to ENV['PATH'] on windows
* Fixed a bug in complex CSS negation selectors
=== 1.0.6
* 5 Bugfixes
* XPath Parser raises a SyntaxError on parse failure
* CSS Parser raises a SyntaxError on parse failure
* filter() and not() hpricot compatibility added
* CSS searches via Node#search are now always relative
* CSS to XPath conversion is now cached
=== 1.0.5
* Bugfixes
* Added mailing list and ticket tracking information to the README.txt
* Sets ENV['PATH'] on windows if it doesn't exist
* Caching results of NodeSet#[] on Document
=== 1.0.4
* Bugfixes
* Changed memory management from weak refs to document refs
* Plugged some memory leaks
* Builder blocks can call methods from surrounding contexts
=== 1.0.3
* 5 Bugfixes
* NodeSet now implements to_ary
* XML::Document should not implement parent
* More GC Bugs fixed. (Mike is AWESOME!)
* Removed RARRAY_LEN for 1.8.5 compatibility. Thanks Shane Hanna.
* inner_html fixed. (Thanks Yehuda!)
=== 1.0.2
* 1 Bugfix
* extconf.rb should not check for frex and racc
=== 1.0.1
* 1 Bugfix
* Made sure extconf.rb searched libdir and prefix so that ports libxml/ruby
will link properly. Thanks lucsky!
=== 1.0.0 / 2008-07-13
* 1 major enhancement
* Birthday!
nokogiri-1.6.1/.travis.yml 0000644 0001750 0001750 00000000572 12261213762 015035 0 ustar boutil boutil language: ruby
rvm:
- 1.9.2
- 1.9.3
- ruby-head
- ree
- jruby-19mode
- rbx-19mode
jdk:
- openjdk7
- openjdk6
matrix:
allow_failures:
- rvm: rbx-19mode
exclude:
- rvm: 1.9.2
jdk: openjdk7
- rvm: 1.9.3
jdk: openjdk7
- rvm: ruby-head
jdk: openjdk7
- rvm: ree
jdk: openjdk7
- rvm: rbx-19mode
jdk: openjdk7
nokogiri-1.6.1/README.rdoc 0000644 0001750 0001750 00000012314 12261213762 014527 0 ustar boutil boutil = Nokogiri {
}[http://travis-ci.org/sparklemotion/nokogiri] {
}[https://codeclimate.com/github/sparklemotion/nokogiri]
* http://nokogiri.org
* http://github.com/sparklemotion/nokogiri/wikis
* http://github.com/sparklemotion/nokogiri/tree/master
* http://groups.google.com/group/nokogiri-talk
* http://github.com/sparklemotion/nokogiri/issues
== DESCRIPTION:
Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among Nokogiri's
many features is the ability to search documents via XPath or CSS3 selectors.
XML is like violence - if it doesn’t solve your problems, you are not using
enough of it.
== FEATURES:
* XPath support for document searching
* CSS3 selector support for document searching
* XML/HTML builder
Nokogiri parses and searches XML/HTML very quickly, and also has
correctly implemented CSS3 selector support as well as XPath support.
== SUPPORT:
Before filing a bug report, please read our {submission guidelines}[http://nokogiri.org/tutorials/getting_help.html] at:
* http://nokogiri.org/tutorials/getting_help.html
The Nokogiri {mailing list}[http://groups.google.com/group/nokogiri-talk]
is available here:
* http://groups.google.com/group/nokogiri-talk
The {bug tracker}[http://github.com/sparklemotion/nokogiri/issues]
is available here:
* http://github.com/sparklemotion/nokogiri/issues
The IRC channel is #nokogiri on freenode.
== SYNOPSIS:
require 'nokogiri'
require 'open-uri'
# Get a Nokogiri::HTML::Document for the page we’re interested in...
doc = Nokogiri::HTML(open('http://www.google.com/search?q=sparklemotion'))
# Do funky things with it using Nokogiri::XML::Node methods...
####
# Search for nodes by css
doc.css('h3.r a').each do |link|
puts link.content
end
####
# Search for nodes by xpath
doc.xpath('//h3/a').each do |link|
puts link.content
end
####
# Or mix and match.
doc.search('h3.r a.l', '//h3/a').each do |link|
puts link.content
end
== REQUIREMENTS:
* ruby 1.8 or 1.9
* libxml2
* libxml2-dev
* libxslt
* libxslt-dev
== ENCODING:
Strings are always stored as UTF-8 internally. Methods that return
text values will always return UTF-8 encoded strings. Methods that
return XML (like to_xml, to_html and inner_html) will return a string
encoded like the source document.
*WARNING*
Some documents declare one particular encoding, but use a different
one. So, which encoding should the parser choose?
Remember that data is just a stream of bytes. Only we humans add
meaning to that stream. Any particular set of bytes could be valid
characters in multiple encodings, so detecting encoding with 100%
accuracy is not possible. libxml2 does its best, but it can't be right
100% of the time.
If you want Nokogiri to handle the document encoding properly, your
best bet is to explicitly set the encoding. Here is an example of
explicitly setting the encoding to EUC-JP on the parser:
doc = Nokogiri.XML('', nil, 'EUC-JP')
== INSTALL:
* sudo gem install nokogiri
=== Binary packages
Binary packages are available for:
* SuSE[http://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/]
* Fedora[http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756]
== DEVELOPMENT:
=== Developing on C Ruby (MRI)
Developing Nokogiri requires racc and rexical to generate the parser and
tokenizer. To start development, make sure you have `libxml2` and `libxslt`
installed.
Then install core gems and bootstrap:
$ gem install hoe rake-compiler mini_portile
$ rake newb
=== Developing on JRuby
Currently, development with JRuby depends on CRuby being installed. With
CRuby, install racc and rexical:
$ gem install racc rexical
Make sure hoe and rake compiler are installed with JRuby:
$ jgem install hoe rake-compiler
Then run rake:
$ jruby -S rake
== LICENSE:
(The MIT License)
Copyright (c) 2008 - 2012:
* {Aaron Patterson}[http://tenderlovemaking.com]
* {Mike Dalessio}[http://mike.daless.io]
* {Charles Nutter}[http://blog.headius.com]
* {Sergio Arbeo}[http://www.serabe.com]
* {Patrick Mahoney}[http://polycrystal.org]
* {Yoko Harada}[http://yokolet.blogspot.com]
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
nokogiri-1.6.1/C_CODING_STYLE.rdoc 0000644 0001750 0001750 00000001152 12261213762 015755 0 ustar boutil boutil = C/C++ mode style for Nokogiri
Please don't propose commits that only change whitespace. However, if your
commit touches a function or section that is not using MRI Ruby conventions,
feel free to update whitespace in the surrounding code.
= WHITESPACE:
* indent level: 2
* indent type: Always spaces
* line Breaks: LF
This style can be automatically applied by running:
astyle --indent=spaces=2 --style=1tbs --keep-one-line-blocks $(ack -f --type=cpp --type=cc ext/nokogiri)
= FUNCTION DECLARATION:
ANSI C style:
type name(args)
{
declarations
code
}
= SOURCES:
* <3<3<3
nokogiri-1.6.1/ROADMAP.md 0000644 0001750 0001750 00000006001 12261213762 014322 0 ustar boutil boutil # Roadmap for 2.0
## overhaul serialize/pretty printing API
* https://github.com/sparklemotion/nokogiri/issues/530
XHTML formatting can't be turned off
* https://github.com/sparklemotion/nokogiri/issues/415
XML formatting should be no formatting
## overhaul and optimize the SAX parsing
* see fairy wing throwdown - SAX parsing is wicked slow.
## Node should not be Enumerable; and should have a better attributes API
* https://github.com/sparklemotion/nokogiri/issues/679
Mixing in Enumerable has some unintended consequences; plus we want to improve the attributes API
* Some ideas for a better attributes API?
* (closed) https://github.com/sparklemotion/nokogiri/issues/666
* https://github.com/sparklemotion/nokogiri/issues/765
## improve CSS query parsing
* https://github.com/sparklemotion/nokogiri/issues/528
support `:not()` with a nontrivial argument, like `:not(div p.c)`
* https://github.com/sparklemotion/nokogiri/issues/451
chained :not pseudoselectors
* better jQuery selector and CSS pseudo-selector support:
* https://github.com/sparklemotion/nokogiri/issues/621
* https://github.com/sparklemotion/nokogiri/issues/342
* https://github.com/sparklemotion/nokogiri/issues/628
* https://github.com/sparklemotion/nokogiri/issues/652
* https://github.com/sparklemotion/nokogiri/issues/688
* https://github.com/sparklemotion/nokogiri/issues/394
nth-of-type is wrong, and possibly other selectors as well
* https://github.com/sparklemotion/nokogiri/issues/309
incorrect query being executed
* https://github.com/sparklemotion/nokogiri/issues/350
:has is wrong?
## DocumentFragment
* there are a few tickets about searches not working properly if you
use or do not use the context node as part of the search.
- https://github.com/sparklemotion/nokogiri/issues/213
- https://github.com/sparklemotion/nokogiri/issues/370
- https://github.com/sparklemotion/nokogiri/issues/454
- https://github.com/sparklemotion/nokogiri/issues/572
## Better Syntax for custom XPath function handler
* https://github.com/sparklemotion/nokogiri/pull/464
## Better Syntax around Node#xpath and NodeSet#xpath
* look at those methods, and use of Node#extract_params in Node#{css,search}
* we should standardize on a hash of options for these and other calls
* what should NodeSet#xpath return?
* https://github.com/sparklemotion/nokogiri/issues/656
* also, clean up or unify the implementations of #xpath-and-friends in Node and NodeSet
* implementations are very similar, but no shared code :(
* decorate nodes in a consistent manner
## Encoding
We have a lot of issues open around encoding. How bad are things?
Would it help if we deprecated support for Ruby 1.8.7? Somebody who
knows encoding well should head this up.
* Extract EncodingReader as a real object that can be injected
https://groups.google.com/forum/#!msg/nokogiri-talk/arJeAtMqvkg/tGihB-iBRSAJ
## Reader
It's fundamentally broken, in that we can't stop people from crashing
their application if they want to use object reference unsafely.
nokogiri-1.6.1/Manifest.txt 0000644 0001750 0001750 00000023451 12261213762 015234 0 ustar boutil boutil .autotest
.gemtest
.travis.yml
CHANGELOG.ja.rdoc
CHANGELOG.rdoc
C_CODING_STYLE.rdoc
Gemfile
Manifest.txt
README.ja.rdoc
README.rdoc
ROADMAP.md
Rakefile
STANDARD_RESPONSES.md
Y_U_NO_GEMSPEC.md
bin/nokogiri
build_all
dependencies.yml
ext/java/nokogiri/EncodingHandler.java
ext/java/nokogiri/HtmlDocument.java
ext/java/nokogiri/HtmlElementDescription.java
ext/java/nokogiri/HtmlEntityLookup.java
ext/java/nokogiri/HtmlSaxParserContext.java
ext/java/nokogiri/NokogiriService.java
ext/java/nokogiri/XmlAttr.java
ext/java/nokogiri/XmlAttributeDecl.java
ext/java/nokogiri/XmlCdata.java
ext/java/nokogiri/XmlComment.java
ext/java/nokogiri/XmlDocument.java
ext/java/nokogiri/XmlDocumentFragment.java
ext/java/nokogiri/XmlDtd.java
ext/java/nokogiri/XmlElement.java
ext/java/nokogiri/XmlElementContent.java
ext/java/nokogiri/XmlElementDecl.java
ext/java/nokogiri/XmlEntityDecl.java
ext/java/nokogiri/XmlEntityReference.java
ext/java/nokogiri/XmlNamespace.java
ext/java/nokogiri/XmlNode.java
ext/java/nokogiri/XmlNodeSet.java
ext/java/nokogiri/XmlProcessingInstruction.java
ext/java/nokogiri/XmlReader.java
ext/java/nokogiri/XmlRelaxng.java
ext/java/nokogiri/XmlSaxParserContext.java
ext/java/nokogiri/XmlSaxPushParser.java
ext/java/nokogiri/XmlSchema.java
ext/java/nokogiri/XmlSyntaxError.java
ext/java/nokogiri/XmlText.java
ext/java/nokogiri/XmlXpathContext.java
ext/java/nokogiri/XsltStylesheet.java
ext/java/nokogiri/internals/ClosedStreamException.java
ext/java/nokogiri/internals/HtmlDomParserContext.java
ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java
ext/java/nokogiri/internals/NokogiriDocumentCache.java
ext/java/nokogiri/internals/NokogiriDomParser.java
ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java
ext/java/nokogiri/internals/NokogiriEntityResolver.java
ext/java/nokogiri/internals/NokogiriErrorHandler.java
ext/java/nokogiri/internals/NokogiriHandler.java
ext/java/nokogiri/internals/NokogiriHelpers.java
ext/java/nokogiri/internals/NokogiriNamespaceCache.java
ext/java/nokogiri/internals/NokogiriNamespaceContext.java
ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java
ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java
ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java
ext/java/nokogiri/internals/NokogiriXPathFunction.java
ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java
ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java
ext/java/nokogiri/internals/NokogiriXsltErrorListener.java
ext/java/nokogiri/internals/ParserContext.java
ext/java/nokogiri/internals/ReaderNode.java
ext/java/nokogiri/internals/SaveContextVisitor.java
ext/java/nokogiri/internals/SchemaErrorHandler.java
ext/java/nokogiri/internals/UncloseableInputStream.java
ext/java/nokogiri/internals/XmlDeclHandler.java
ext/java/nokogiri/internals/XmlDomParserContext.java
ext/java/nokogiri/internals/XmlSaxParser.java
ext/java/nokogiri/internals/XsltExtensionFunction.java
ext/nokogiri/depend
ext/nokogiri/extconf.rb
ext/nokogiri/html_document.c
ext/nokogiri/html_document.h
ext/nokogiri/html_element_description.c
ext/nokogiri/html_element_description.h
ext/nokogiri/html_entity_lookup.c
ext/nokogiri/html_entity_lookup.h
ext/nokogiri/html_sax_parser_context.c
ext/nokogiri/html_sax_parser_context.h
ext/nokogiri/html_sax_push_parser.c
ext/nokogiri/html_sax_push_parser.h
ext/nokogiri/nokogiri.c
ext/nokogiri/nokogiri.h
ext/nokogiri/xml_attr.c
ext/nokogiri/xml_attr.h
ext/nokogiri/xml_attribute_decl.c
ext/nokogiri/xml_attribute_decl.h
ext/nokogiri/xml_cdata.c
ext/nokogiri/xml_cdata.h
ext/nokogiri/xml_comment.c
ext/nokogiri/xml_comment.h
ext/nokogiri/xml_document.c
ext/nokogiri/xml_document.h
ext/nokogiri/xml_document_fragment.c
ext/nokogiri/xml_document_fragment.h
ext/nokogiri/xml_dtd.c
ext/nokogiri/xml_dtd.h
ext/nokogiri/xml_element_content.c
ext/nokogiri/xml_element_content.h
ext/nokogiri/xml_element_decl.c
ext/nokogiri/xml_element_decl.h
ext/nokogiri/xml_encoding_handler.c
ext/nokogiri/xml_encoding_handler.h
ext/nokogiri/xml_entity_decl.c
ext/nokogiri/xml_entity_decl.h
ext/nokogiri/xml_entity_reference.c
ext/nokogiri/xml_entity_reference.h
ext/nokogiri/xml_io.c
ext/nokogiri/xml_io.h
ext/nokogiri/xml_libxml2_hacks.c
ext/nokogiri/xml_libxml2_hacks.h
ext/nokogiri/xml_namespace.c
ext/nokogiri/xml_namespace.h
ext/nokogiri/xml_node.c
ext/nokogiri/xml_node.h
ext/nokogiri/xml_node_set.c
ext/nokogiri/xml_node_set.h
ext/nokogiri/xml_processing_instruction.c
ext/nokogiri/xml_processing_instruction.h
ext/nokogiri/xml_reader.c
ext/nokogiri/xml_reader.h
ext/nokogiri/xml_relax_ng.c
ext/nokogiri/xml_relax_ng.h
ext/nokogiri/xml_sax_parser.c
ext/nokogiri/xml_sax_parser.h
ext/nokogiri/xml_sax_parser_context.c
ext/nokogiri/xml_sax_parser_context.h
ext/nokogiri/xml_sax_push_parser.c
ext/nokogiri/xml_sax_push_parser.h
ext/nokogiri/xml_schema.c
ext/nokogiri/xml_schema.h
ext/nokogiri/xml_syntax_error.c
ext/nokogiri/xml_syntax_error.h
ext/nokogiri/xml_text.c
ext/nokogiri/xml_text.h
ext/nokogiri/xml_xpath_context.c
ext/nokogiri/xml_xpath_context.h
ext/nokogiri/xslt_stylesheet.c
ext/nokogiri/xslt_stylesheet.h
lib/isorelax.jar
lib/jing.jar
lib/nekodtd.jar
lib/nekohtml.jar
lib/nokogiri.rb
lib/nokogiri/css.rb
lib/nokogiri/css/node.rb
lib/nokogiri/css/parser.rb
lib/nokogiri/css/parser.y
lib/nokogiri/css/parser_extras.rb
lib/nokogiri/css/syntax_error.rb
lib/nokogiri/css/tokenizer.rb
lib/nokogiri/css/tokenizer.rex
lib/nokogiri/css/xpath_visitor.rb
lib/nokogiri/decorators/slop.rb
lib/nokogiri/html.rb
lib/nokogiri/html/builder.rb
lib/nokogiri/html/document.rb
lib/nokogiri/html/document_fragment.rb
lib/nokogiri/html/element_description.rb
lib/nokogiri/html/element_description_defaults.rb
lib/nokogiri/html/entity_lookup.rb
lib/nokogiri/html/sax/parser.rb
lib/nokogiri/html/sax/parser_context.rb
lib/nokogiri/html/sax/push_parser.rb
lib/nokogiri/syntax_error.rb
lib/nokogiri/version.rb
lib/nokogiri/xml.rb
lib/nokogiri/xml/attr.rb
lib/nokogiri/xml/attribute_decl.rb
lib/nokogiri/xml/builder.rb
lib/nokogiri/xml/cdata.rb
lib/nokogiri/xml/character_data.rb
lib/nokogiri/xml/document.rb
lib/nokogiri/xml/document_fragment.rb
lib/nokogiri/xml/dtd.rb
lib/nokogiri/xml/element_content.rb
lib/nokogiri/xml/element_decl.rb
lib/nokogiri/xml/entity_decl.rb
lib/nokogiri/xml/namespace.rb
lib/nokogiri/xml/node.rb
lib/nokogiri/xml/node/save_options.rb
lib/nokogiri/xml/node_set.rb
lib/nokogiri/xml/notation.rb
lib/nokogiri/xml/parse_options.rb
lib/nokogiri/xml/pp.rb
lib/nokogiri/xml/pp/character_data.rb
lib/nokogiri/xml/pp/node.rb
lib/nokogiri/xml/processing_instruction.rb
lib/nokogiri/xml/reader.rb
lib/nokogiri/xml/relax_ng.rb
lib/nokogiri/xml/sax.rb
lib/nokogiri/xml/sax/document.rb
lib/nokogiri/xml/sax/parser.rb
lib/nokogiri/xml/sax/parser_context.rb
lib/nokogiri/xml/sax/push_parser.rb
lib/nokogiri/xml/schema.rb
lib/nokogiri/xml/syntax_error.rb
lib/nokogiri/xml/text.rb
lib/nokogiri/xml/xpath.rb
lib/nokogiri/xml/xpath/syntax_error.rb
lib/nokogiri/xml/xpath_context.rb
lib/nokogiri/xslt.rb
lib/nokogiri/xslt/stylesheet.rb
lib/xercesImpl.jar
lib/xsd/xmlparser/nokogiri.rb
tasks/cross_compile.rb
tasks/nokogiri.org.rb
tasks/test.rb
test/css/test_nthiness.rb
test/css/test_parser.rb
test/css/test_tokenizer.rb
test/css/test_xpath_visitor.rb
test/decorators/test_slop.rb
test/files/2ch.html
test/files/address_book.rlx
test/files/address_book.xml
test/files/bar/bar.xsd
test/files/bogus.xml
test/files/dont_hurt_em_why.xml
test/files/encoding.html
test/files/encoding.xhtml
test/files/exslt.xml
test/files/exslt.xslt
test/files/foo/foo.xsd
test/files/metacharset.html
test/files/noencoding.html
test/files/po.xml
test/files/po.xsd
test/files/saml/saml20assertion_schema.xsd
test/files/saml/saml20protocol_schema.xsd
test/files/saml/xenc_schema.xsd
test/files/saml/xmldsig_schema.xsd
test/files/shift_jis.html
test/files/shift_jis.xml
test/files/snuggles.xml
test/files/staff.dtd
test/files/staff.xml
test/files/staff.xslt
test/files/test_document_url/bar.xml
test/files/test_document_url/document.dtd
test/files/test_document_url/document.xml
test/files/tlm.html
test/files/to_be_xincluded.xml
test/files/valid_bar.xml
test/files/xinclude.xml
test/helper.rb
test/html/sax/test_parser.rb
test/html/sax/test_parser_context.rb
test/html/test_builder.rb
test/html/test_document.rb
test/html/test_document_encoding.rb
test/html/test_document_fragment.rb
test/html/test_element_description.rb
test/html/test_named_characters.rb
test/html/test_node.rb
test/html/test_node_encoding.rb
test/namespaces/test_additional_namespaces_in_builder_doc.rb
test/namespaces/test_namespaces_in_builder_doc.rb
test/namespaces/test_namespaces_in_created_doc.rb
test/namespaces/test_namespaces_in_parsed_doc.rb
test/test_convert_xpath.rb
test/test_css_cache.rb
test/test_encoding_handler.rb
test/test_memory_leak.rb
test/test_nokogiri.rb
test/test_reader.rb
test/test_soap4r_sax.rb
test/test_xslt_transforms.rb
test/xml/node/test_save_options.rb
test/xml/node/test_subclass.rb
test/xml/sax/test_parser.rb
test/xml/sax/test_parser_context.rb
test/xml/sax/test_push_parser.rb
test/xml/test_attr.rb
test/xml/test_attribute_decl.rb
test/xml/test_builder.rb
test/xml/test_c14n.rb
test/xml/test_cdata.rb
test/xml/test_comment.rb
test/xml/test_document.rb
test/xml/test_document_encoding.rb
test/xml/test_document_fragment.rb
test/xml/test_dtd.rb
test/xml/test_dtd_encoding.rb
test/xml/test_element_content.rb
test/xml/test_element_decl.rb
test/xml/test_entity_decl.rb
test/xml/test_entity_reference.rb
test/xml/test_namespace.rb
test/xml/test_node.rb
test/xml/test_node_attributes.rb
test/xml/test_node_encoding.rb
test/xml/test_node_inheritance.rb
test/xml/test_node_reparenting.rb
test/xml/test_node_set.rb
test/xml/test_parse_options.rb
test/xml/test_processing_instruction.rb
test/xml/test_reader_encoding.rb
test/xml/test_relax_ng.rb
test/xml/test_schema.rb
test/xml/test_syntax_error.rb
test/xml/test_text.rb
test/xml/test_unparented_node.rb
test/xml/test_xinclude.rb
test/xml/test_xpath.rb
test/xslt/test_custom_functions.rb
test/xslt/test_exception_handling.rb
test_all
nokogiri-1.6.1/tasks/ 0000755 0001750 0001750 00000000000 12261213762 014045 5 ustar boutil boutil nokogiri-1.6.1/tasks/cross_compile.rb 0000644 0001750 0001750 00000007453 12261213762 017244 0 ustar boutil boutil gem 'rake-compiler'
require 'rake/extensioncompiler'
HOST = Rake::ExtensionCompiler.mingw_host
require 'resolv'
require 'mini_portile'
dependencies = YAML.load_file("dependencies.yml")
$recipes = {}
%w[zlib libiconv libxml2 libxslt].each do |lib|
$recipes[lib] = MiniPortile.new lib, dependencies[lib]
end
$recipes.each { |_, recipe| recipe.host = HOST }
file "lib/nokogiri/nokogiri.rb" do
File.open("lib/nokogiri/nokogiri.rb", 'wb') do |f|
f.write %Q{require "nokogiri/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/nokogiri"\n}
end
end
namespace :cross do
task :zlib do
recipe = $recipes["zlib"]
recipe.files = ["http://zlib.net/#{recipe.name}-#{recipe.version}.tar.gz"]
class << recipe
def configure
Dir.chdir work_path do
mk = File.read 'win32/Makefile.gcc'
File.open 'win32/Makefile.gcc', 'wb' do |f|
f.puts "BINARY_PATH = #{CROSS_DIR}/bin"
f.puts "LIBRARY_PATH = #{CROSS_DIR}/lib"
f.puts "INCLUDE_PATH = #{CROSS_DIR}/include"
f.puts mk.sub(/^PREFIX\s*=\s*$/, "PREFIX = #{HOST}-")
end
end
end
def configured?
Dir.chdir work_path do
!! (File.read('win32/Makefile.gcc') =~ /^BINARY_PATH/)
end
end
def compile
execute "compile", "make -f win32/Makefile.gcc"
end
def install
execute "install", "make -f win32/Makefile.gcc install"
end
end
checkpoint = "#{CROSS_DIR}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed"
unless File.exist?(checkpoint)
recipe.cook
touch checkpoint
end
recipe.activate
end
task :libiconv do
recipe = $recipes["libiconv"]
recipe.files = ["http://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz"]
recipe.configure_options = [
"--host=#{HOST}",
"--enable-static",
"--disable-shared",
"CPPFLAGS='-mno-cygwin -Wall'",
"CFLAGS='-mno-cygwin -O2 -g'",
"CXXFLAGS='-mno-cygwin -O2 -g'",
"LDFLAGS=-mno-cygwin"
]
checkpoint = "#{CROSS_DIR}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed"
unless File.exist?(checkpoint)
recipe.cook
touch checkpoint
end
recipe.activate
end
task :libxml2 => ["cross:zlib", "cross:libiconv"] do
recipe = $recipes["libxml2"]
recipe.files = ["ftp://ftp.xmlsoft.org/libxml2/#{recipe.name}-#{recipe.version}.tar.gz"]
recipe.configure_options = [
"--host=#{HOST}",
"--enable-static",
"--disable-shared",
"--with-zlib=#{CROSS_DIR}",
"--with-iconv=#{$recipes["libiconv"].path}",
"--without-python",
"--without-readline",
"CFLAGS='-DIN_LIBXML'"
]
checkpoint = "#{CROSS_DIR}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed"
unless File.exist?(checkpoint)
recipe.cook
touch checkpoint
end
recipe.activate
end
task :libxslt => ['cross:libxml2'] do
recipe = $recipes["libxslt"]
recipe.files = ["ftp://ftp.xmlsoft.org/libxml2/#{recipe.name}-#{recipe.version}.tar.gz"]
recipe.configure_options = [
"--host=#{HOST}",
"--enable-static",
"--disable-shared",
"--with-libxml-prefix=#{$recipes["libxml2"].path}",
"--without-python",
"--without-crypto",
"CFLAGS='-DIN_LIBXML'"
]
checkpoint = "#{CROSS_DIR}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed"
unless File.exist?(checkpoint)
recipe.cook
touch checkpoint
end
recipe.activate
end
task :file_list do
HOE.spec.files += Dir["lib/nokogiri/nokogiri.rb"]
HOE.spec.files += Dir["lib/nokogiri/{1.9,2.0}/nokogiri.so"]
end
end
require 'rake/clean'
CLOBBER.include("#{CROSS_DIR}/*.installed", "#{CROSS_DIR}/#{HOST}", "tmp/#{HOST}")
task :cross => ["cross:libxslt", "lib/nokogiri/nokogiri.rb", "cross:file_list"]
nokogiri-1.6.1/tasks/test.rb 0000644 0001750 0001750 00000006126 12261213762 015356 0 ustar boutil boutil namespace :test do
desc "run test suite with aggressive GC"
task :gc => :build do
ENV['NOKOGIRI_GC'] = "true"
Rake::Task["test"].invoke
end
desc "find call-seq in the rdoc"
task :rdoc_call_seq => 'docs' do
Dir['doc/**/*.html'].each { |docfile|
next if docfile =~ /\.src/
puts "FAIL: #{docfile}" if File.read(docfile) =~ /call-seq/
}
end
desc "find all undocumented things"
task :rdoc => 'docs' do
base = File.expand_path(File.join(File.dirname(__FILE__), '..', 'doc'))
require 'test/unit'
test = Class.new(Test::Unit::TestCase)
Dir["#{base}/**/*.html"].each { |docfile|
test.class_eval(<<-eotest)
def test_#{docfile.sub("#{base}/", '').gsub(/[\/\.-]/, '_')}
assert_no_match(
/Not documented/,
File.read('#{docfile}'),
'#{docfile} has undocumented things'
)
end
eotest
}
end
desc "Test against multiple versions of libxml2 (MULTIXML2_DIR=directory)"
task :multixml2 do
MULTI_XML = File.join(ENV['HOME'], '.multixml2')
unless File.exists?(MULTI_XML)
%w{ versions install build }.each { |x|
FileUtils.mkdir_p(File.join(MULTI_XML, x))
}
Dir.chdir File.join(MULTI_XML, 'versions') do
require 'net/ftp'
puts "Contacting xmlsoft.org ..."
ftp = Net::FTP.new('xmlsoft.org')
ftp.login('anonymous', 'anonymous')
ftp.chdir('libxml2')
ftp.list('libxml2-2.*.tar.gz').each do |x|
file = x[/[^\s]*$/]
puts "Downloading #{file}"
ftp.getbinaryfile(file)
end
end
end
# Build any libxml2 versions in $HOME/.multixml2/versions that
# haven't been built yet
Dir[File.join(MULTI_XML, 'versions','*.tar.gz')].each do |f|
filename = File.basename(f, '.tar.gz')
install_dir = File.join(MULTI_XML, 'install', filename)
next if File.exists?(install_dir)
Dir.chdir File.join(MULTI_XML, 'versions') do
system "tar zxvf #{f} -C #{File.join(MULTI_XML, 'build')}"
end
Dir.chdir File.join(MULTI_XML, 'build', filename) do
system "./configure --without-http --prefix=#{install_dir}"
system "make && make install"
end
end
test_results = {}
libxslt = Dir[File.join(MULTI_XML, 'install', 'libxslt*')].first
directories = ENV['MULTIXML2_DIR'] ? [ENV['MULTIXML2_DIR']] : Dir[File.join(MULTI_XML, 'install', '*')]
directories.sort.reverse_each do |xml2_version|
next unless xml2_version =~ /libxml2/
extopts = "--with-xml2-include=#{xml2_version}/include/libxml2 --with-xml2-lib=#{xml2_version}/lib --with-xslt-dir=#{libxslt} --with-iconv-dir=/usr"
cmd = "#{$0} clean test EXTOPTS='#{extopts}' LD_LIBRARY_PATH='#{xml2_version}/lib'"
version = File.basename(xml2_version)
result = system(cmd)
test_results[version] = {
:result => result,
:cmd => cmd
}
end
test_results.sort_by { |k,v| k }.each do |k,v|
passed = v[:result]
puts "#{k}: #{passed ? 'PASS' : 'FAIL'}"
puts "repro: #{v[:cmd]}" unless passed
end
end
end
nokogiri-1.6.1/tasks/nokogiri.org.rb 0000644 0001750 0001750 00000001415 12261213762 017002 0 ustar boutil boutil #
# note that this file will only work if you've got the `nokogiri.org`
# repo checked out, and you've got an rvm gemset "1.8.7@nokogiri"
# bundled with both nokogiri's and nokogiri.org's gems.
#
namespace :docs do
desc "generate HTML docs for nokogiri.org"
task :website do
system 'rvm use 1.8.7@nokogiri' # see above
title = "#{HOE.name}-#{HOE.version} Documentation"
options = []
options << "--main=#{HOE.readme_file}"
options << '--format=activerecord'
options << '--threads=1'
options << "--title=#{title.inspect}"
options += HOE.spec.require_paths
options += HOE.spec.extra_rdoc_files
require 'rdoc/rdoc'
ENV['RAILS_ROOT'] ||= File.expand_path(File.join('..', 'nokogiri_ws'))
RDoc::RDoc.new.document options
end
end
nokogiri-1.6.1/dependencies.yml 0000644 0001750 0001750 00000000104 12261213762 016064 0 ustar boutil boutil libxml2: "2.8.0"
libxslt: "1.1.26"
zlib: "1.2.7"
libiconv: "1.13.1"
nokogiri-1.6.1/ports/ 0000755 0001750 0001750 00000000000 12261213762 014067 5 ustar boutil boutil nokogiri-1.6.1/ext/ 0000755 0001750 0001750 00000000000 12261213762 013520 5 ustar boutil boutil nokogiri-1.6.1/ext/nokogiri/ 0000755 0001750 0001750 00000000000 12261213762 015341 5 ustar boutil boutil nokogiri-1.6.1/ext/nokogiri/html_element_description.c 0000644 0001750 0001750 00000013414 12261213762 022570 0 ustar boutil boutil #include
/*
* call-seq:
* required_attributes
*
* A list of required attributes for this element
*/
static VALUE required_attributes(VALUE self)
{
htmlElemDesc * description;
VALUE list;
int i;
Data_Get_Struct(self, htmlElemDesc, description);
list = rb_ary_new();
if(NULL == description->attrs_req) return list;
for(i = 0; description->attrs_depr[i]; i++) {
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
}
return list;
}
/*
* call-seq:
* deprecated_attributes
*
* A list of deprecated attributes for this element
*/
static VALUE deprecated_attributes(VALUE self)
{
htmlElemDesc * description;
VALUE list;
int i;
Data_Get_Struct(self, htmlElemDesc, description);
list = rb_ary_new();
if(NULL == description->attrs_depr) return list;
for(i = 0; description->attrs_depr[i]; i++) {
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
}
return list;
}
/*
* call-seq:
* optional_attributes
*
* A list of optional attributes for this element
*/
static VALUE optional_attributes(VALUE self)
{
htmlElemDesc * description;
VALUE list;
int i;
Data_Get_Struct(self, htmlElemDesc, description);
list = rb_ary_new();
if(NULL == description->attrs_opt) return list;
for(i = 0; description->attrs_opt[i]; i++) {
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
}
return list;
}
/*
* call-seq:
* default_sub_element
*
* The default sub element for this element
*/
static VALUE default_sub_element(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
if (description->defaultsubelt)
return NOKOGIRI_STR_NEW2(description->defaultsubelt);
return Qnil;
}
/*
* call-seq:
* sub_elements
*
* A list of allowed sub elements for this element.
*/
static VALUE sub_elements(VALUE self)
{
htmlElemDesc * description;
VALUE list;
int i;
Data_Get_Struct(self, htmlElemDesc, description);
list = rb_ary_new();
if(NULL == description->subelts) return list;
for(i = 0; description->subelts[i]; i++) {
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
}
return list;
}
/*
* call-seq:
* description
*
* The description for this element
*/
static VALUE description(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
return NOKOGIRI_STR_NEW2(description->desc);
}
/*
* call-seq:
* inline?
*
* Is this element an inline element?
*/
static VALUE inline_eh(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
if(description->isinline) return Qtrue;
return Qfalse;
}
/*
* call-seq:
* deprecated?
*
* Is this element deprecated?
*/
static VALUE deprecated_eh(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
if(description->depr) return Qtrue;
return Qfalse;
}
/*
* call-seq:
* empty?
*
* Is this an empty element?
*/
static VALUE empty_eh(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
if(description->empty) return Qtrue;
return Qfalse;
}
/*
* call-seq:
* save_end_tag?
*
* Should the end tag be saved?
*/
static VALUE save_end_tag_eh(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
if(description->saveEndTag) return Qtrue;
return Qfalse;
}
/*
* call-seq:
* implied_end_tag?
*
* Can the end tag be implied for this tag?
*/
static VALUE implied_end_tag_eh(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
if(description->endTag) return Qtrue;
return Qfalse;
}
/*
* call-seq:
* implied_start_tag?
*
* Can the start tag be implied for this tag?
*/
static VALUE implied_start_tag_eh(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
if(description->startTag) return Qtrue;
return Qfalse;
}
/*
* call-seq:
* name
*
* Get the tag name for this ElemementDescription
*/
static VALUE name(VALUE self)
{
htmlElemDesc * description;
Data_Get_Struct(self, htmlElemDesc, description);
if(NULL == description->name) return Qnil;
return NOKOGIRI_STR_NEW2(description->name);
}
/*
* call-seq:
* [](tag_name)
*
* Get ElemementDescription for +tag_name+
*/
static VALUE get_description(VALUE klass, VALUE tag_name)
{
const htmlElemDesc * description = htmlTagLookup(
(const xmlChar *)StringValuePtr(tag_name)
);
if(NULL == description) return Qnil;
return Data_Wrap_Struct(klass, 0, 0, (void *)description);
}
VALUE cNokogiriHtmlElementDescription ;
void init_html_element_description()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE html = rb_define_module_under(nokogiri, "HTML");
VALUE klass = rb_define_class_under(html, "ElementDescription",rb_cObject);
cNokogiriHtmlElementDescription = klass;
rb_define_singleton_method(klass, "[]", get_description, 1);
rb_define_method(klass, "name", name, 0);
rb_define_method(klass, "implied_start_tag?", implied_start_tag_eh, 0);
rb_define_method(klass, "implied_end_tag?", implied_end_tag_eh, 0);
rb_define_method(klass, "save_end_tag?", save_end_tag_eh, 0);
rb_define_method(klass, "empty?", empty_eh, 0);
rb_define_method(klass, "deprecated?", deprecated_eh, 0);
rb_define_method(klass, "inline?", inline_eh, 0);
rb_define_method(klass, "description", description, 0);
rb_define_method(klass, "sub_elements", sub_elements, 0);
rb_define_method(klass, "default_sub_element", default_sub_element, 0);
rb_define_method(klass, "optional_attributes", optional_attributes, 0);
rb_define_method(klass, "deprecated_attributes", deprecated_attributes, 0);
rb_define_method(klass, "required_attributes", required_attributes, 0);
}
nokogiri-1.6.1/ext/nokogiri/html_entity_lookup.c 0000644 0001750 0001750 00000001454 12261213762 021442 0 ustar boutil boutil #include
/*
* call-seq:
* get(key)
*
* Get the HTML::EntityDescription for +key+
*/
static VALUE get(VALUE self, VALUE key)
{
const htmlEntityDesc * desc =
htmlEntityLookup((const xmlChar *)StringValuePtr(key));
VALUE klass, args[3];
if(NULL == desc) return Qnil;
klass = rb_const_get(mNokogiriHtml, rb_intern("EntityDescription"));
args[0] = INT2NUM((long)desc->value);
args[1] = NOKOGIRI_STR_NEW2(desc->name);
args[2] = NOKOGIRI_STR_NEW2(desc->desc);
return rb_class_new_instance(3, args, klass);
}
void init_html_entity_lookup()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE html = rb_define_module_under(nokogiri, "HTML");
VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
rb_define_method(klass, "get", get, 1);
}
nokogiri-1.6.1/ext/nokogiri/xml_document.h 0000644 0001750 0001750 00000001301 12261213762 020203 0 ustar boutil boutil #ifndef NOKOGIRI_XML_DOCUMENT
#define NOKOGIRI_XML_DOCUMENT
#include
struct _nokogiriTuple {
VALUE doc;
st_table *unlinkedNodes;
VALUE node_cache;
};
typedef struct _nokogiriTuple nokogiriTuple;
typedef nokogiriTuple * nokogiriTuplePtr;
void init_xml_document();
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
#define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache)
extern VALUE cNokogiriXmlDocument ;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_libxml2_hacks.h 0000644 0001750 0001750 00000000402 12261213762 021110 0 ustar boutil boutil #ifndef HAVE_XMLFIRSTELEMENTCHILD
#ifndef XML_LIBXML2_HACKS
#define XML_LIBXML2_HACKS
xmlNodePtr xmlFirstElementChild(xmlNodePtr parent);
xmlNodePtr xmlNextElementSibling(xmlNodePtr node);
xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
#endif
#endif
nokogiri-1.6.1/ext/nokogiri/xml_comment.h 0000644 0001750 0001750 00000000225 12261213762 020033 0 ustar boutil boutil #ifndef NOKOGIRI_XML_COMMENT
#define NOKOGIRI_XML_COMMENT
#include
void init_xml_comment();
extern VALUE cNokogiriXmlComment;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_entity_reference.c 0000644 0001750 0001750 00000002244 12261213762 021721 0 ustar boutil boutil #include
/*
* call-seq:
* new(document, content)
*
* Create a new EntityReference element on the +document+ with +name+
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr xml_doc;
xmlNodePtr node;
VALUE document;
VALUE name;
VALUE rest;
VALUE rb_node;
rb_scan_args(argc, argv, "2*", &document, &name, &rest);
Data_Get_Struct(document, xmlDoc, xml_doc);
node = xmlNewReference(
xml_doc,
(const xmlChar *)StringValuePtr(name)
);
nokogiri_root_node(node);
rb_node = Nokogiri_wrap_xml_node(klass, node);
rb_obj_call_init(rb_node, argc, argv);
if(rb_block_given_p()) rb_yield(rb_node);
return rb_node;
}
VALUE cNokogiriXmlEntityReference;
void init_xml_entity_reference()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
/*
* EntityReference represents an EntityReference node in an xml document.
*/
VALUE klass = rb_define_class_under(xml, "EntityReference", node);
cNokogiriXmlEntityReference = klass;
rb_define_singleton_method(klass, "new", new, -1);
}
nokogiri-1.6.1/ext/nokogiri/xml_relax_ng.c 0000644 0001750 0001750 00000007114 12261213762 020167 0 ustar boutil boutil #include
static void dealloc(xmlRelaxNGPtr schema)
{
NOKOGIRI_DEBUG_START(schema);
xmlRelaxNGFree(schema);
NOKOGIRI_DEBUG_END(schema);
}
/*
* call-seq:
* validate_document(document)
*
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
*/
static VALUE validate_document(VALUE self, VALUE document)
{
xmlDocPtr doc;
xmlRelaxNGPtr schema;
VALUE errors;
xmlRelaxNGValidCtxtPtr valid_ctxt;
Data_Get_Struct(self, xmlRelaxNG, schema);
Data_Get_Struct(document, xmlDoc, doc);
errors = rb_ary_new();
valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
if(NULL == valid_ctxt) {
/* we have a problem */
rb_raise(rb_eRuntimeError, "Could not create a validation context");
}
#ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
xmlRelaxNGSetValidStructuredErrors(
valid_ctxt,
Nokogiri_error_array_pusher,
(void *)errors
);
#endif
xmlRelaxNGValidateDoc(valid_ctxt, doc);
xmlRelaxNGFreeValidCtxt(valid_ctxt);
return errors;
}
/*
* call-seq:
* read_memory(string)
*
* Create a new RelaxNG from the contents of +string+
*/
static VALUE read_memory(VALUE klass, VALUE content)
{
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
(const char *)StringValuePtr(content),
(int)RSTRING_LEN(content)
);
xmlRelaxNGPtr schema;
VALUE errors = rb_ary_new();
VALUE rb_schema;
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
xmlRelaxNGSetParserStructuredErrors(
ctx,
Nokogiri_error_array_pusher,
(void *)errors
);
#endif
schema = xmlRelaxNGParse(ctx);
xmlSetStructuredErrorFunc(NULL, NULL);
xmlRelaxNGFreeParserCtxt(ctx);
if(NULL == schema) {
xmlErrorPtr error = xmlGetLastError();
if(error)
Nokogiri_error_raise(NULL, error);
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
return rb_schema;
}
/*
* call-seq:
* from_document(doc)
*
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
*/
static VALUE from_document(VALUE klass, VALUE document)
{
xmlDocPtr doc;
xmlRelaxNGParserCtxtPtr ctx;
xmlRelaxNGPtr schema;
VALUE errors;
VALUE rb_schema;
Data_Get_Struct(document, xmlDoc, doc);
/* In case someone passes us a node. ugh. */
doc = doc->doc;
ctx = xmlRelaxNGNewDocParserCtxt(doc);
errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
xmlRelaxNGSetParserStructuredErrors(
ctx,
Nokogiri_error_array_pusher,
(void *)errors
);
#endif
schema = xmlRelaxNGParse(ctx);
xmlSetStructuredErrorFunc(NULL, NULL);
if(NULL == schema) {
xmlErrorPtr error = xmlGetLastError();
if(error)
Nokogiri_error_raise(NULL, error);
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
return rb_schema;
}
VALUE cNokogiriXmlRelaxNG;
void init_xml_relax_ng()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
cNokogiriXmlRelaxNG = klass;
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
rb_define_singleton_method(klass, "from_document", from_document, 1);
rb_define_private_method(klass, "validate_document", validate_document, 1);
}
nokogiri-1.6.1/ext/nokogiri/xml_entity_reference.h 0000644 0001750 0001750 00000000270 12261213762 021723 0 ustar boutil boutil #ifndef NOKOGIRI_XML_ENTITY_REFERENCE
#define NOKOGIRI_XML_ENTITY_REFERENCE
#include
void init_xml_entity_reference();
extern VALUE cNokogiriXmlEntityReference;
#endif
nokogiri-1.6.1/ext/nokogiri/html_document.c 0000644 0001750 0001750 00000010650 12261213762 020351 0 ustar boutil boutil #include
static ID id_encoding_found;
/*
* call-seq:
* new
*
* Create a new document
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
VALUE uri, external_id, rest, rb_doc;
htmlDocPtr doc;
rb_scan_args(argc, argv, "0*", &rest);
uri = rb_ary_entry(rest, (long)0);
external_id = rb_ary_entry(rest, (long)1);
doc = htmlNewDoc(
RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
);
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
rb_obj_call_init(rb_doc, argc, argv);
return rb_doc ;
}
/*
* call-seq:
* read_io(io, url, encoding, options)
*
* Read the HTML document from +io+ with given +url+, +encoding+,
* and +options+. See Nokogiri::HTML.parse
*/
static VALUE read_io( VALUE klass,
VALUE io,
VALUE url,
VALUE encoding,
VALUE options )
{
const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
VALUE error_list = rb_ary_new();
VALUE document;
htmlDocPtr doc;
xmlResetLastError();
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
doc = htmlReadIO(
io_read_callback,
io_close_callback,
(void *)io,
c_url,
c_enc,
(int)NUM2INT(options)
);
xmlSetStructuredErrorFunc(NULL, NULL);
/*
* If EncodingFound has occurred in EncodingReader, make sure to do
* a cleanup and propagate the error.
*/
if (rb_respond_to(io, id_encoding_found)) {
VALUE encoding_found = rb_funcall(io, id_encoding_found, 0);
if (!NIL_P(encoding_found)) {
xmlFreeDoc(doc);
rb_exc_raise(encoding_found);
}
}
if(doc == NULL) {
xmlErrorPtr error;
xmlFreeDoc(doc);
error = xmlGetLastError();
if(error)
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
document = Nokogiri_wrap_xml_document(klass, doc);
rb_iv_set(document, "@errors", error_list);
return document;
}
/*
* call-seq:
* read_memory(string, url, encoding, options)
*
* Read the HTML document contained in +string+ with given +url+, +encoding+,
* and +options+. See Nokogiri::HTML.parse
*/
static VALUE read_memory( VALUE klass,
VALUE string,
VALUE url,
VALUE encoding,
VALUE options )
{
const char * c_buffer = StringValuePtr(string);
const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
int len = (int)RSTRING_LEN(string);
VALUE error_list = rb_ary_new();
VALUE document;
htmlDocPtr doc;
xmlResetLastError();
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
xmlSetStructuredErrorFunc(NULL, NULL);
if(doc == NULL) {
xmlErrorPtr error;
xmlFreeDoc(doc);
error = xmlGetLastError();
if(error)
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
document = Nokogiri_wrap_xml_document(klass, doc);
rb_iv_set(document, "@errors", error_list);
return document;
}
/*
* call-seq:
* type
*
* The type for this document
*/
static VALUE type(VALUE self)
{
htmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
return INT2NUM((long)doc->type);
}
VALUE cNokogiriHtmlDocument ;
void init_html_document()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE html = rb_define_module_under(nokogiri, "HTML");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
VALUE xml_doc = rb_define_class_under(xml, "Document", node);
VALUE klass = rb_define_class_under(html, "Document", xml_doc);
cNokogiriHtmlDocument = klass;
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
rb_define_singleton_method(klass, "read_io", read_io, 4);
rb_define_singleton_method(klass, "new", new, -1);
rb_define_method(klass, "type", type, 0);
id_encoding_found = rb_intern("encoding_found");
}
nokogiri-1.6.1/ext/nokogiri/xml_element_decl.h 0000644 0001750 0001750 00000000250 12261213762 021007 0 ustar boutil boutil #ifndef NOKOGIRI_XML_ELEMENT_DECL
#define NOKOGIRI_XML_ELEMENT_DECL
#include
void init_xml_element_decl();
extern VALUE cNokogiriXmlElementDecl;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_libxml2_hacks.c 0000644 0001750 0001750 00000005633 12261213762 021116 0 ustar boutil boutil #ifndef HAVE_XMLFIRSTELEMENTCHILD
#include
/**
* xmlFirstElementChild:
* @parent: the parent node
*
* Finds the first child node of that element which is a Element node
* Note the handling of entities references is different than in
* the W3C DOM element traversal spec since we don't have back reference
* from entities content to entities references.
*
* Returns the first element child or NULL if not available
*/
xmlNodePtr
xmlFirstElementChild(xmlNodePtr parent) {
xmlNodePtr cur = NULL;
if (parent == NULL)
return(NULL);
switch (parent->type) {
case XML_ELEMENT_NODE:
case XML_ENTITY_NODE:
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
cur = parent->children;
break;
default:
return(NULL);
}
while (cur != NULL) {
if (cur->type == XML_ELEMENT_NODE)
return(cur);
cur = cur->next;
}
return(NULL);
}
/**
* xmlNextElementSibling:
* @node: the current node
*
* Finds the first closest next sibling of the node which is an
* element node.
* Note the handling of entities references is different than in
* the W3C DOM element traversal spec since we don't have back reference
* from entities content to entities references.
*
* Returns the next element sibling or NULL if not available
*/
xmlNodePtr
xmlNextElementSibling(xmlNodePtr node) {
if (node == NULL)
return(NULL);
switch (node->type) {
case XML_ELEMENT_NODE:
case XML_TEXT_NODE:
case XML_CDATA_SECTION_NODE:
case XML_ENTITY_REF_NODE:
case XML_ENTITY_NODE:
case XML_PI_NODE:
case XML_COMMENT_NODE:
case XML_DTD_NODE:
case XML_XINCLUDE_START:
case XML_XINCLUDE_END:
node = node->next;
break;
default:
return(NULL);
}
while (node != NULL) {
if (node->type == XML_ELEMENT_NODE)
return(node);
node = node->next;
}
return(NULL);
}
/**
* xmlLastElementChild:
* @parent: the parent node
*
* Finds the last child node of that element which is a Element node
* Note the handling of entities references is different than in
* the W3C DOM element traversal spec since we don't have back reference
* from entities content to entities references.
*
* Returns the last element child or NULL if not available
*/
xmlNodePtr
xmlLastElementChild(xmlNodePtr parent) {
xmlNodePtr cur = NULL;
if (parent == NULL)
return(NULL);
switch (parent->type) {
case XML_ELEMENT_NODE:
case XML_ENTITY_NODE:
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
cur = parent->last;
break;
default:
return(NULL);
}
while (cur != NULL) {
if (cur->type == XML_ELEMENT_NODE)
return(cur);
cur = cur->prev;
}
return(NULL);
}
#endif
nokogiri-1.6.1/ext/nokogiri/xml_processing_instruction.h 0000644 0001750 0001750 00000000320 12261213762 023202 0 ustar boutil boutil #ifndef NOKOGIRI_XML_PROCESSING_INSTRUCTION
#define NOKOGIRI_XML_PROCESSING_INSTRUCTION
#include
void init_xml_processing_instruction();
extern VALUE cNokogiriXmlProcessingInstruction;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_sax_parser_context.h 0000644 0001750 0001750 00000000300 12261213762 022276 0 ustar boutil boutil #ifndef NOKOGIRI_XML_SAX_PARSER_CONTEXT
#define NOKOGIRI_XML_SAX_PARSER_CONTEXT
#include
extern VALUE cNokogiriXmlSaxParserContext;
void init_xml_sax_parser_context();
#endif
nokogiri-1.6.1/ext/nokogiri/html_element_description.h 0000644 0001750 0001750 00000000312 12261213762 022566 0 ustar boutil boutil #ifndef NOKOGIRI_HTML_ELEMENT_DESCRIPTION
#define NOKOGIRI_HTML_ELEMENT_DESCRIPTION
#include
void init_html_element_description();
extern VALUE cNokogiriHtmlElementDescription ;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_node_set.h 0000644 0001750 0001750 00000000531 12261213762 020171 0 ustar boutil boutil #ifndef NOKOGIRI_XML_NODE_SET
#define NOKOGIRI_XML_NODE_SET
#include
void init_xml_node_set();
extern VALUE cNokogiriXmlNodeSet ;
VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) ;
typedef struct _nokogiriNodeSetTuple {
xmlNodeSetPtr node_set;
st_table *namespaces;
} nokogiriNodeSetTuple;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_attribute_decl.h 0000644 0001750 0001750 00000000260 12261213762 021362 0 ustar boutil boutil #ifndef NOKOGIRI_XML_ATTRIBUTE_DECL
#define NOKOGIRI_XML_ATTRIBUTE_DECL
#include
void init_xml_attribute_decl();
extern VALUE cNokogiriXmlAttributeDecl;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_node.h 0000644 0001750 0001750 00000000461 12261213762 017320 0 ustar boutil boutil #ifndef NOKOGIRI_XML_NODE
#define NOKOGIRI_XML_NODE
#include
void init_xml_node();
extern VALUE cNokogiriXmlNode ;
extern VALUE cNokogiriXmlElement ;
VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node) ;
void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_hash) ;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_sax_parser_context.c 0000644 0001750 0001750 00000011421 12261213762 022277 0 ustar boutil boutil #include
VALUE cNokogiriXmlSaxParserContext ;
static void deallocate(xmlParserCtxtPtr ctxt)
{
NOKOGIRI_DEBUG_START(handler);
ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
NOKOGIRI_DEBUG_END(handler);
}
/*
* call-seq:
* parse_io(io, encoding)
*
* Parse +io+ object with +encoding+
*/
static VALUE
parse_io(VALUE klass, VALUE io, VALUE encoding)
{
xmlParserCtxtPtr ctxt;
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
(xmlInputReadCallback)io_read_callback,
(xmlInputCloseCallback)io_close_callback,
(void *)io, enc);
if (ctxt->sax) {
xmlFree(ctxt->sax);
ctxt->sax = NULL;
}
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
}
/*
* call-seq:
* parse_file(filename)
*
* Parse file given +filename+
*/
static VALUE parse_file(VALUE klass, VALUE filename)
{
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValuePtr(filename));
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
}
/*
* call-seq:
* parse_memory(data)
*
* Parse the XML stored in memory in +data+
*/
static VALUE
parse_memory(VALUE klass, VALUE data)
{
xmlParserCtxtPtr ctxt;
if (NIL_P(data))
rb_raise(rb_eArgError, "data cannot be nil");
if (!(int)RSTRING_LEN(data))
rb_raise(rb_eRuntimeError, "data cannot be empty");
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
(int)RSTRING_LEN(data));
if (ctxt->sax) {
xmlFree(ctxt->sax);
ctxt->sax = NULL;
}
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
}
static VALUE
parse_doc(VALUE ctxt_val)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
xmlParseDocument(ctxt);
return Qnil;
}
static VALUE
parse_doc_finalize(VALUE ctxt_val)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
if (NULL != ctxt->myDoc)
xmlFreeDoc(ctxt->myDoc);
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
return Qnil;
}
/*
* call-seq:
* parse_with(sax_handler)
*
* Use +sax_handler+ and parse the current document
*/
static VALUE
parse_with(VALUE self, VALUE sax_handler)
{
xmlParserCtxtPtr ctxt;
xmlSAXHandlerPtr sax;
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
Data_Get_Struct(self, xmlParserCtxt, ctxt);
Data_Get_Struct(sax_handler, xmlSAXHandler, sax);
/* Free the sax handler since we'll assign our own */
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
xmlFree(ctxt->sax);
ctxt->sax = sax;
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
return Qnil;
}
/*
* call-seq:
* replace_entities=(boolean)
*
* Should this parser replace entities? & will get converted to '&' if
* set to true
*/
static VALUE set_replace_entities(VALUE self, VALUE value)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if(Qfalse == value)
ctxt->replaceEntities = 0;
else
ctxt->replaceEntities = 1;
return value;
}
/*
* call-seq:
* replace_entities
*
* Should this parser replace entities? & will get converted to '&' if
* set to true
*/
static VALUE get_replace_entities(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if(0 == ctxt->replaceEntities)
return Qfalse;
else
return Qtrue;
}
/*
* call-seq: line
*
* Get the current line the parser context is processing.
*/
static VALUE line(VALUE self)
{
xmlParserCtxtPtr ctxt;
xmlParserInputPtr io;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
io = ctxt->input;
if(io)
return INT2NUM(io->line);
return Qnil;
}
/*
* call-seq: column
*
* Get the current column the parser context is processing.
*/
static VALUE column(VALUE self)
{
xmlParserCtxtPtr ctxt;
xmlParserInputPtr io;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
io = ctxt->input;
if(io)
return INT2NUM(io->col);
return Qnil;
}
void init_xml_sax_parser_context()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE sax = rb_define_module_under(xml, "SAX");
VALUE klass = rb_define_class_under(sax, "ParserContext", rb_cObject);
cNokogiriXmlSaxParserContext = klass;
rb_define_singleton_method(klass, "io", parse_io, 2);
rb_define_singleton_method(klass, "memory", parse_memory, 1);
rb_define_singleton_method(klass, "file", parse_file, 1);
rb_define_method(klass, "parse_with", parse_with, 1);
rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
rb_define_method(klass, "replace_entities", get_replace_entities, 0);
rb_define_method(klass, "line", line, 0);
rb_define_method(klass, "column", column, 0);
}
nokogiri-1.6.1/ext/nokogiri/xml_element_content.h 0000644 0001750 0001750 00000000332 12261213762 021553 0 ustar boutil boutil #ifndef NOKOGIRI_XML_ELEMENT_CONTENT
#define NOKOGIRI_XML_ELEMENT_CONTENT
#include
VALUE Nokogiri_wrap_element_content(VALUE doc, xmlElementContentPtr element);
void init_xml_element_content();
#endif
nokogiri-1.6.1/ext/nokogiri/xml_entity_decl.c 0000644 0001750 0001750 00000005144 12261213762 020674 0 ustar boutil boutil #include
/*
* call-seq:
* original_content
*
* Get the original_content before ref substitution
*/
static VALUE original_content(VALUE self)
{
xmlEntityPtr node;
Data_Get_Struct(self, xmlEntity, node);
if(!node->orig) return Qnil;
return NOKOGIRI_STR_NEW2(node->orig);
}
/*
* call-seq:
* content
*
* Get the content
*/
static VALUE get_content(VALUE self)
{
xmlEntityPtr node;
Data_Get_Struct(self, xmlEntity, node);
if(!node->content) return Qnil;
return NOKOGIRI_STR_NEW(node->content, node->length);
}
/*
* call-seq:
* entity_type
*
* Get the entity type
*/
static VALUE entity_type(VALUE self)
{
xmlEntityPtr node;
Data_Get_Struct(self, xmlEntity, node);
return INT2NUM((int)node->etype);
}
/*
* call-seq:
* external_id
*
* Get the external identifier for PUBLIC
*/
static VALUE external_id(VALUE self)
{
xmlEntityPtr node;
Data_Get_Struct(self, xmlEntity, node);
if(!node->ExternalID) return Qnil;
return NOKOGIRI_STR_NEW2(node->ExternalID);
}
/*
* call-seq:
* system_id
*
* Get the URI for a SYSTEM or PUBLIC Entity
*/
static VALUE system_id(VALUE self)
{
xmlEntityPtr node;
Data_Get_Struct(self, xmlEntity, node);
if(!node->SystemID) return Qnil;
return NOKOGIRI_STR_NEW2(node->SystemID);
}
VALUE cNokogiriXmlEntityDecl;
void init_xml_entity_decl()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
VALUE klass = rb_define_class_under(xml, "EntityDecl", node);
cNokogiriXmlEntityDecl = klass;
rb_define_method(klass, "original_content", original_content, 0);
rb_define_method(klass, "content", get_content, 0);
rb_define_method(klass, "entity_type", entity_type, 0);
rb_define_method(klass, "external_id", external_id, 0);
rb_define_method(klass, "system_id", system_id, 0);
rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_GENERAL"),
INT2NUM(XML_INTERNAL_GENERAL_ENTITY));
rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_GENERAL_PARSED"),
INT2NUM(XML_EXTERNAL_GENERAL_PARSED_ENTITY));
rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_GENERAL_UNPARSED"),
INT2NUM(XML_EXTERNAL_GENERAL_UNPARSED_ENTITY));
rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_PARAMETER"),
INT2NUM(XML_INTERNAL_PARAMETER_ENTITY));
rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_PARAMETER"),
INT2NUM(XML_EXTERNAL_PARAMETER_ENTITY));
rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_PREDEFINED"),
INT2NUM(XML_INTERNAL_PREDEFINED_ENTITY));
}
nokogiri-1.6.1/ext/nokogiri/xml_entity_decl.h 0000644 0001750 0001750 00000000245 12261213762 020676 0 ustar boutil boutil #ifndef NOKOGIRI_XML_ENTITY_DECL
#define NOKOGIRI_XML_ENTITY_DECL
#include
void init_xml_entity_decl();
extern VALUE cNokogiriXmlEntityDecl;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_schema.c 0000644 0001750 0001750 00000010721 12261213762 017626 0 ustar boutil boutil #include
static void dealloc(xmlSchemaPtr schema)
{
NOKOGIRI_DEBUG_START(schema);
xmlSchemaFree(schema);
NOKOGIRI_DEBUG_END(schema);
}
/*
* call-seq:
* validate_document(document)
*
* Validate a Nokogiri::XML::Document against this Schema.
*/
static VALUE validate_document(VALUE self, VALUE document)
{
xmlDocPtr doc;
xmlSchemaPtr schema;
xmlSchemaValidCtxtPtr valid_ctxt;
VALUE errors;
Data_Get_Struct(self, xmlSchema, schema);
Data_Get_Struct(document, xmlDoc, doc);
errors = rb_ary_new();
valid_ctxt = xmlSchemaNewValidCtxt(schema);
if(NULL == valid_ctxt) {
/* we have a problem */
rb_raise(rb_eRuntimeError, "Could not create a validation context");
}
#ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS
xmlSchemaSetValidStructuredErrors(
valid_ctxt,
Nokogiri_error_array_pusher,
(void *)errors
);
#endif
xmlSchemaValidateDoc(valid_ctxt, doc);
xmlSchemaFreeValidCtxt(valid_ctxt);
return errors;
}
/*
* call-seq:
* validate_file(filename)
*
* Validate a file against this Schema.
*/
static VALUE validate_file(VALUE self, VALUE rb_filename)
{
xmlSchemaPtr schema;
xmlSchemaValidCtxtPtr valid_ctxt;
const char *filename ;
VALUE errors;
Data_Get_Struct(self, xmlSchema, schema);
filename = (const char*)StringValuePtr(rb_filename) ;
errors = rb_ary_new();
valid_ctxt = xmlSchemaNewValidCtxt(schema);
if(NULL == valid_ctxt) {
/* we have a problem */
rb_raise(rb_eRuntimeError, "Could not create a validation context");
}
#ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS
xmlSchemaSetValidStructuredErrors(
valid_ctxt,
Nokogiri_error_array_pusher,
(void *)errors
);
#endif
xmlSchemaValidateFile(valid_ctxt, filename, 0);
xmlSchemaFreeValidCtxt(valid_ctxt);
return errors;
}
/*
* call-seq:
* read_memory(string)
*
* Create a new Schema from the contents of +string+
*/
static VALUE read_memory(VALUE klass, VALUE content)
{
xmlSchemaPtr schema;
xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
(const char *)StringValuePtr(content),
(int)RSTRING_LEN(content)
);
VALUE rb_schema;
VALUE errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
xmlSchemaSetParserStructuredErrors(
ctx,
Nokogiri_error_array_pusher,
(void *)errors
);
#endif
schema = xmlSchemaParse(ctx);
xmlSetStructuredErrorFunc(NULL, NULL);
xmlSchemaFreeParserCtxt(ctx);
if(NULL == schema) {
xmlErrorPtr error = xmlGetLastError();
if(error)
Nokogiri_error_raise(NULL, error);
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
return rb_schema;
}
/*
* call-seq:
* from_document(doc)
*
* Create a new Schema from the Nokogiri::XML::Document +doc+
*/
static VALUE from_document(VALUE klass, VALUE document)
{
xmlDocPtr doc;
xmlSchemaParserCtxtPtr ctx;
xmlSchemaPtr schema;
VALUE errors;
VALUE rb_schema;
Data_Get_Struct(document, xmlDoc, doc);
/* In case someone passes us a node. ugh. */
doc = doc->doc;
ctx = xmlSchemaNewDocParserCtxt(doc);
errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
xmlSchemaSetParserStructuredErrors(
ctx,
Nokogiri_error_array_pusher,
(void *)errors
);
#endif
schema = xmlSchemaParse(ctx);
xmlSetStructuredErrorFunc(NULL, NULL);
xmlSchemaFreeParserCtxt(ctx);
if(NULL == schema) {
xmlErrorPtr error = xmlGetLastError();
if(error)
Nokogiri_error_raise(NULL, error);
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
return rb_schema;
return Qnil;
}
VALUE cNokogiriXmlSchema;
void init_xml_schema()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE klass = rb_define_class_under(xml, "Schema", rb_cObject);
cNokogiriXmlSchema = klass;
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
rb_define_singleton_method(klass, "from_document", from_document, 1);
rb_define_private_method(klass, "validate_document", validate_document, 1);
rb_define_private_method(klass, "validate_file", validate_file, 1);
}
nokogiri-1.6.1/ext/nokogiri/xml_namespace.h 0000644 0001750 0001750 00000000446 12261213762 020332 0 ustar boutil boutil #ifndef NOKOGIRI_XML_NAMESPACE
#define NOKOGIRI_XML_NAMESPACE
#include
void init_xml_namespace();
extern VALUE cNokogiriXmlNamespace ;
VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node) ;
VALUE Nokogiri_wrap_xml_namespace2(VALUE document, xmlNsPtr node) ;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_sax_parser.c 0000644 0001750 0001750 00000020442 12261213762 020536 0 ustar boutil boutil #include
int vasprintf (char **strp, const char *fmt, va_list ap);
void vasprintf_free (void *p);
static ID id_start_document, id_end_document, id_start_element, id_end_element;
static ID id_start_element_namespace, id_end_element_namespace;
static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
static ID id_cdata_block, id_cAttribute;
static ID id_processing_instruction;
#define STRING_OR_NULL(str) \
(RTEST(str) ? StringValuePtr(str) : NULL)
static void start_document(void * ctx)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
if(NULL != ctxt && ctxt->html != 1) {
if(ctxt->standalone != -1) { /* -1 means there was no declaration */
VALUE encoding = ctxt->encoding ?
NOKOGIRI_STR_NEW2(ctxt->encoding) :
Qnil;
VALUE version = ctxt->version ?
NOKOGIRI_STR_NEW2(ctxt->version) :
Qnil;
VALUE standalone = Qnil;
switch(ctxt->standalone)
{
case 0:
standalone = NOKOGIRI_STR_NEW2("no");
break;
case 1:
standalone = NOKOGIRI_STR_NEW2("yes");
break;
}
rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
}
}
rb_funcall(doc, id_start_document, 0);
}
static void end_document(void * ctx)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
rb_funcall(doc, id_end_document, 0);
}
static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
VALUE attributes = rb_ary_new();
const xmlChar * attr;
int i = 0;
if(atts) {
while((attr = atts[i]) != NULL) {
const xmlChar * val = atts[i+1];
VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
i+=2;
}
}
rb_funcall( doc,
id_start_element,
2,
NOKOGIRI_STR_NEW2(name),
attributes
);
}
static void end_element(void * ctx, const xmlChar *name)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
}
static VALUE attributes_as_list(
VALUE self,
int nb_attributes,
const xmlChar ** attributes)
{
VALUE list = rb_ary_new2((long)nb_attributes);
VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
if (attributes) {
/* Each attribute is an array of [localname, prefix, URI, value, end] */
int i;
for (i = 0; i < nb_attributes * 5; i += 5) {
VALUE argv[4], attribute;
argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
/* value */
argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
(attributes[i+4] - attributes[i+3]));
attribute = rb_class_new_instance(4, argv, attr_klass);
rb_ary_push(list, attribute);
}
}
return list;
}
static void
start_element_ns (
void * ctx,
const xmlChar * localname,
const xmlChar * prefix,
const xmlChar * uri,
int nb_namespaces,
const xmlChar ** namespaces,
int nb_attributes,
int nb_defaulted,
const xmlChar ** attributes)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
if (namespaces) {
int i;
for (i = 0; i < nb_namespaces * 2; i += 2)
{
rb_ary_push(ns_list,
rb_ary_new3((long)2,
RBSTR_OR_QNIL(namespaces[i + 0]),
RBSTR_OR_QNIL(namespaces[i + 1])
)
);
}
}
rb_funcall( doc,
id_start_element_namespace,
5,
NOKOGIRI_STR_NEW2(localname),
attribute_list,
RBSTR_OR_QNIL(prefix),
RBSTR_OR_QNIL(uri),
ns_list
);
}
/**
* end_element_ns was borrowed heavily from libxml-ruby.
*/
static void
end_element_ns (
void * ctx,
const xmlChar * localname,
const xmlChar * prefix,
const xmlChar * uri)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
rb_funcall(doc, id_end_element_namespace, 3,
NOKOGIRI_STR_NEW2(localname),
RBSTR_OR_QNIL(prefix),
RBSTR_OR_QNIL(uri)
);
}
static void characters_func(void * ctx, const xmlChar * ch, int len)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
VALUE str = NOKOGIRI_STR_NEW(ch, len);
rb_funcall(doc, id_characters, 1, str);
}
static void comment_func(void * ctx, const xmlChar * value)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
VALUE str = NOKOGIRI_STR_NEW2(value);
rb_funcall(doc, id_comment, 1, str);
}
static void warning_func(void * ctx, const char *msg, ...)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
char * message;
VALUE ruby_message;
va_list args;
va_start(args, msg);
vasprintf(&message, msg, args);
va_end(args);
ruby_message = NOKOGIRI_STR_NEW2(message);
vasprintf_free(message);
rb_funcall(doc, id_warning, 1, ruby_message);
}
static void error_func(void * ctx, const char *msg, ...)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
char * message;
VALUE ruby_message;
va_list args;
va_start(args, msg);
vasprintf(&message, msg, args);
va_end(args);
ruby_message = NOKOGIRI_STR_NEW2(message);
vasprintf_free(message);
rb_funcall(doc, id_error, 1, ruby_message);
}
static void cdata_block(void * ctx, const xmlChar * value, int len)
{
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
VALUE string = NOKOGIRI_STR_NEW(value, len);
rb_funcall(doc, id_cdata_block, 1, string);
}
static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content)
{
VALUE rb_content;
VALUE self = NOKOGIRI_SAX_SELF(ctx);
VALUE doc = rb_iv_get(self, "@document");
rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
rb_funcall( doc,
id_processing_instruction,
2,
NOKOGIRI_STR_NEW2(name),
rb_content
);
}
static void deallocate(xmlSAXHandlerPtr handler)
{
NOKOGIRI_DEBUG_START(handler);
free(handler);
NOKOGIRI_DEBUG_END(handler);
}
static VALUE allocate(VALUE klass)
{
xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
xmlSetStructuredErrorFunc(NULL, NULL);
handler->startDocument = start_document;
handler->endDocument = end_document;
handler->startElement = start_element;
handler->endElement = end_element;
handler->startElementNs = start_element_ns;
handler->endElementNs = end_element_ns;
handler->characters = characters_func;
handler->comment = comment_func;
handler->warning = warning_func;
handler->error = error_func;
handler->cdataBlock = cdata_block;
handler->processingInstruction = processing_instruction;
handler->initialized = XML_SAX2_MAGIC;
return Data_Wrap_Struct(klass, NULL, deallocate, handler);
}
VALUE cNokogiriXmlSaxParser ;
void init_xml_sax_parser()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE sax = rb_define_module_under(xml, "SAX");
VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
cNokogiriXmlSaxParser = klass;
rb_define_alloc_func(klass, allocate);
id_start_document = rb_intern("start_document");
id_end_document = rb_intern("end_document");
id_start_element = rb_intern("start_element");
id_end_element = rb_intern("end_element");
id_comment = rb_intern("comment");
id_characters = rb_intern("characters");
id_xmldecl = rb_intern("xmldecl");
id_error = rb_intern("error");
id_warning = rb_intern("warning");
id_cdata_block = rb_intern("cdata_block");
id_cAttribute = rb_intern("Attribute");
id_start_element_namespace = rb_intern("start_element_namespace");
id_end_element_namespace = rb_intern("end_element_namespace");
id_processing_instruction = rb_intern("processing_instruction");
}
nokogiri-1.6.1/ext/nokogiri/xml_text.h 0000644 0001750 0001750 00000000212 12261213762 017351 0 ustar boutil boutil #ifndef NOKOGIRI_XML_TEXT
#define NOKOGIRI_XML_TEXT
#include
void init_xml_text();
extern VALUE cNokogiriXmlText ;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_dtd.c 0000644 0001750 0001750 00000010031 12261213762 017133 0 ustar boutil boutil #include
static void notation_copier(void *payload, void *data, xmlChar *name)
{
VALUE hash = (VALUE)data;
VALUE klass = rb_const_get(mNokogiriXml, rb_intern("Notation"));
xmlNotationPtr c_notation = (xmlNotationPtr)payload;
VALUE notation;
VALUE argv[3];
argv[0] = (c_notation->name ? NOKOGIRI_STR_NEW2(c_notation->name) : Qnil);
argv[1] = (c_notation->PublicID ? NOKOGIRI_STR_NEW2(c_notation->PublicID) : Qnil);
argv[2] = (c_notation->SystemID ? NOKOGIRI_STR_NEW2(c_notation->SystemID) : Qnil);
notation = rb_class_new_instance(3, argv, klass);
rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name),notation);
}
static void element_copier(void *_payload, void *data, xmlChar *name)
{
VALUE hash = (VALUE)data;
xmlNodePtr payload = (xmlNodePtr)_payload;
VALUE element = Nokogiri_wrap_xml_node(Qnil, payload);
rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name), element);
}
/*
* call-seq:
* entities
*
* Get a hash of the elements for this DTD.
*/
static VALUE entities(VALUE self)
{
xmlDtdPtr dtd;
VALUE hash;
Data_Get_Struct(self, xmlDtd, dtd);
if(!dtd->entities) return Qnil;
hash = rb_hash_new();
xmlHashScan((xmlHashTablePtr)dtd->entities, element_copier, (void *)hash);
return hash;
}
/*
* call-seq:
* notations
*
* Get a hash of the notations for this DTD.
*/
static VALUE notations(VALUE self)
{
xmlDtdPtr dtd;
VALUE hash;
Data_Get_Struct(self, xmlDtd, dtd);
if(!dtd->notations) return Qnil;
hash = rb_hash_new();
xmlHashScan((xmlHashTablePtr)dtd->notations, notation_copier, (void *)hash);
return hash;
}
/*
* call-seq:
* attributes
*
* Get a hash of the attributes for this DTD.
*/
static VALUE attributes(VALUE self)
{
xmlDtdPtr dtd;
VALUE hash;
Data_Get_Struct(self, xmlDtd, dtd);
hash = rb_hash_new();
if(!dtd->attributes) return hash;
xmlHashScan((xmlHashTablePtr)dtd->attributes, element_copier, (void *)hash);
return hash;
}
/*
* call-seq:
* elements
*
* Get a hash of the elements for this DTD.
*/
static VALUE elements(VALUE self)
{
xmlDtdPtr dtd;
VALUE hash;
Data_Get_Struct(self, xmlDtd, dtd);
if(!dtd->elements) return Qnil;
hash = rb_hash_new();
xmlHashScan((xmlHashTablePtr)dtd->elements, element_copier, (void *)hash);
return hash;
}
/*
* call-seq:
* validate(document)
*
* Validate +document+ returning a list of errors
*/
static VALUE validate(VALUE self, VALUE document)
{
xmlDocPtr doc;
xmlDtdPtr dtd;
xmlValidCtxtPtr ctxt;
VALUE error_list;
Data_Get_Struct(self, xmlDtd, dtd);
Data_Get_Struct(document, xmlDoc, doc);
error_list = rb_ary_new();
ctxt = xmlNewValidCtxt();
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
xmlValidateDtd(ctxt, doc, dtd);
xmlSetStructuredErrorFunc(NULL, NULL);
xmlFreeValidCtxt(ctxt);
return error_list;
}
/*
* call-seq:
* system_id
*
* Get the System ID for this DTD
*/
static VALUE system_id(VALUE self)
{
xmlDtdPtr dtd;
Data_Get_Struct(self, xmlDtd, dtd);
if(!dtd->SystemID) return Qnil;
return NOKOGIRI_STR_NEW2(dtd->SystemID);
}
/*
* call-seq:
* external_id
*
* Get the External ID for this DTD
*/
static VALUE external_id(VALUE self)
{
xmlDtdPtr dtd;
Data_Get_Struct(self, xmlDtd, dtd);
if(!dtd->ExternalID) return Qnil;
return NOKOGIRI_STR_NEW2(dtd->ExternalID);
}
VALUE cNokogiriXmlDtd;
void init_xml_dtd()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
/*
* Nokogiri::XML::DTD wraps DTD nodes in an XML document
*/
VALUE klass = rb_define_class_under(xml, "DTD", node);
cNokogiriXmlDtd = klass;
rb_define_method(klass, "notations", notations, 0);
rb_define_method(klass, "elements", elements, 0);
rb_define_method(klass, "entities", entities, 0);
rb_define_method(klass, "validate", validate, 1);
rb_define_method(klass, "attributes", attributes, 0);
rb_define_method(klass, "system_id", system_id, 0);
rb_define_method(klass, "external_id", external_id, 0);
}
nokogiri-1.6.1/ext/nokogiri/xml_io.h 0000644 0001750 0001750 00000000401 12261213762 016774 0 ustar boutil boutil #ifndef NOKOGIRI_XML_IO
#define NOKOGIRI_XML_IO
#include
int io_read_callback(void * ctx, char * buffer, int len);
int io_write_callback(void * ctx, char * buffer, int len);
int io_close_callback(void * ctx);
void init_nokogiri_io();
#endif
nokogiri-1.6.1/ext/nokogiri/xml_namespace.c 0000644 0001750 0001750 00000003002 12261213762 020314 0 ustar boutil boutil #include
VALUE cNokogiriXmlNamespace ;
/*
* call-seq:
* prefix
*
* Get the prefix for this namespace. Returns +nil+ if there is no prefix.
*/
static VALUE prefix(VALUE self)
{
xmlNsPtr ns;
Data_Get_Struct(self, xmlNs, ns);
if(!ns->prefix) return Qnil;
return NOKOGIRI_STR_NEW2(ns->prefix);
}
/*
* call-seq:
* href
*
* Get the href for this namespace
*/
static VALUE href(VALUE self)
{
xmlNsPtr ns;
Data_Get_Struct(self, xmlNs, ns);
if(!ns->href) return Qnil;
return NOKOGIRI_STR_NEW2(ns->href);
}
VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node)
{
VALUE ns, document, node_cache;
assert(doc->_private);
if(node->_private)
return (VALUE)node->_private;
ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node);
document = DOC_RUBY_OBJECT(doc);
node_cache = rb_iv_get(document, "@node_cache");
rb_ary_push(node_cache, ns);
rb_iv_set(ns, "@document", DOC_RUBY_OBJECT(doc));
node->_private = (void *)ns;
return ns;
}
VALUE Nokogiri_wrap_xml_namespace2(VALUE document, xmlNsPtr node)
{
xmlDocPtr doc;
Data_Get_Struct(document, xmlDoc, doc) ;
return Nokogiri_wrap_xml_namespace(doc, node);
}
void init_xml_namespace()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE klass = rb_define_class_under(xml, "Namespace", rb_cObject);
cNokogiriXmlNamespace = klass;
rb_define_method(klass, "prefix", prefix, 0);
rb_define_method(klass, "href", href, 0);
}
nokogiri-1.6.1/ext/nokogiri/xml_cdata.c 0000644 0001750 0001750 00000002450 12261213762 017442 0 ustar boutil boutil #include
/*
* call-seq:
* new(document, content)
*
* Create a new CDATA element on the +document+ with +content+
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr xml_doc;
xmlNodePtr node;
VALUE doc;
VALUE content;
VALUE rest;
VALUE rb_node;
rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
Data_Get_Struct(doc, xmlDoc, xml_doc);
node = xmlNewCDataBlock(
xml_doc->doc,
NIL_P(content) ? NULL : (const xmlChar *)StringValuePtr(content),
NIL_P(content) ? 0 : (int)RSTRING_LEN(content)
);
nokogiri_root_node(node);
rb_node = Nokogiri_wrap_xml_node(klass, node);
rb_obj_call_init(rb_node, argc, argv);
if(rb_block_given_p()) rb_yield(rb_node);
return rb_node;
}
VALUE cNokogiriXmlCData;
void init_xml_cdata()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
VALUE text = rb_define_class_under(xml, "Text", char_data);
/*
* CData represents a CData node in an xml document.
*/
VALUE klass = rb_define_class_under(xml, "CDATA", text);
cNokogiriXmlCData = klass;
rb_define_singleton_method(klass, "new", new, -1);
}
nokogiri-1.6.1/ext/nokogiri/html_sax_parser_context.c 0000644 0001750 0001750 00000006003 12261213762 022443 0 ustar boutil boutil #include
VALUE cNokogiriHtmlSaxParserContext ;
static void deallocate(xmlParserCtxtPtr ctxt)
{
NOKOGIRI_DEBUG_START(handler);
ctxt->sax = NULL;
htmlFreeParserCtxt(ctxt);
NOKOGIRI_DEBUG_END(handler);
}
static VALUE
parse_memory(VALUE klass, VALUE data, VALUE encoding)
{
htmlParserCtxtPtr ctxt;
if (NIL_P(data))
rb_raise(rb_eArgError, "data cannot be nil");
if (!(int)RSTRING_LEN(data))
rb_raise(rb_eRuntimeError, "data cannot be empty");
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
(int)RSTRING_LEN(data));
if (ctxt->sax) {
xmlFree(ctxt->sax);
ctxt->sax = NULL;
}
if (RTEST(encoding)) {
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
if (enc != NULL) {
xmlSwitchToEncoding(ctxt, enc);
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
StringValuePtr(encoding));
}
}
}
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
}
static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
{
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
StringValuePtr(filename),
StringValuePtr(encoding)
);
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
}
static VALUE
parse_doc(VALUE ctxt_val)
{
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
htmlParseDocument(ctxt);
return Qnil;
}
static VALUE
parse_doc_finalize(VALUE ctxt_val)
{
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
if (ctxt->myDoc)
xmlFreeDoc(ctxt->myDoc);
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
return Qnil;
}
static VALUE
parse_with(VALUE self, VALUE sax_handler)
{
htmlParserCtxtPtr ctxt;
htmlSAXHandlerPtr sax;
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
Data_Get_Struct(self, htmlParserCtxt, ctxt);
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
/* Free the sax handler since we'll assign our own */
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
xmlFree(ctxt->sax);
ctxt->sax = sax;
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
return self;
}
void init_html_sax_parser_context()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE html = rb_define_module_under(nokogiri, "HTML");
VALUE sax = rb_define_module_under(xml, "SAX");
VALUE hsax = rb_define_module_under(html, "SAX");
VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
cNokogiriHtmlSaxParserContext = klass;
rb_define_singleton_method(klass, "memory", parse_memory, 2);
rb_define_singleton_method(klass, "file", parse_file, 2);
rb_define_method(klass, "parse_with", parse_with, 1);
}
nokogiri-1.6.1/ext/nokogiri/xml_sax_parser.h 0000644 0001750 0001750 00000001511 12261213762 020537 0 ustar boutil boutil #ifndef NOKOGIRI_XML_SAX_PARSER
#define NOKOGIRI_XML_SAX_PARSER
#include
void init_xml_sax_parser();
extern VALUE cNokogiriXmlSaxParser ;
typedef struct _nokogiriSAXTuple {
xmlParserCtxtPtr ctxt;
VALUE self;
} nokogiriSAXTuple;
typedef nokogiriSAXTuple * nokogiriSAXTuplePtr;
#define NOKOGIRI_SAX_SELF(_ctxt) \
((nokogiriSAXTuplePtr)(_ctxt))->self
#define NOKOGIRI_SAX_CTXT(_ctxt) \
((nokogiriSAXTuplePtr)(_ctxt))->ctxt
#define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) \
nokogiri_sax_tuple_new(_ctxt, _self)
static inline nokogiriSAXTuplePtr
nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
{
nokogiriSAXTuplePtr tuple = malloc(sizeof(nokogiriSAXTuple));
tuple->self = self;
tuple->ctxt = ctxt;
return tuple;
}
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) \
free(_tuple) \
#endif
nokogiri-1.6.1/ext/nokogiri/xml_xpath_context.h 0000644 0001750 0001750 00000000467 12261213762 021271 0 ustar boutil boutil #ifndef NOKOGIRI_XML_XPATH_CONTEXT
#define NOKOGIRI_XML_XPATH_CONTEXT
#include
void init_xml_xpath_context();
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char* function_name) ;
extern VALUE cNokogiriXmlXpathContext;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_reader.c 0000644 0001750 0001750 00000035223 12261213762 017634 0 ustar boutil boutil #include
static void dealloc(xmlTextReaderPtr reader)
{
NOKOGIRI_DEBUG_START(reader);
xmlFreeTextReader(reader);
NOKOGIRI_DEBUG_END(reader);
}
static int has_attributes(xmlTextReaderPtr reader)
{
/*
* this implementation of xmlTextReaderHasAttributes explicitly includes
* namespaces and properties, because some earlier versions ignore
* namespaces.
*/
xmlNodePtr node ;
node = xmlTextReaderCurrentNode(reader);
if (node == NULL)
return(0);
if ((node->type == XML_ELEMENT_NODE) &&
((node->properties != NULL) || (node->nsDef != NULL)))
return(1);
return(0);
}
static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
{
xmlNsPtr ns;
static char buffer[XMLNS_BUFFER_LEN] ;
char *key ;
size_t keylen ;
if (node->type != XML_ELEMENT_NODE) return ;
ns = node->nsDef;
while (ns != NULL) {
keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
if (keylen > XMLNS_BUFFER_LEN) {
key = (char*)malloc(keylen) ;
} else {
key = buffer ;
}
if (ns->prefix) {
sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
} else {
sprintf(key, "%s", XMLNS_PREFIX);
}
rb_hash_aset(attr_hash,
NOKOGIRI_STR_NEW2(key),
(ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
);
if (key != buffer) {
free(key);
}
ns = ns->next ;
}
}
/*
* call-seq:
* default?
*
* Was an attribute generated from the default value in the DTD or schema?
*/
static VALUE default_eh(VALUE self)
{
xmlTextReaderPtr reader;
int eh;
Data_Get_Struct(self, xmlTextReader, reader);
eh = xmlTextReaderIsDefault(reader);
if(eh == 0) return Qfalse;
if(eh == 1) return Qtrue;
return Qnil;
}
/*
* call-seq:
* value?
*
* Does this node have a text value?
*/
static VALUE value_eh(VALUE self)
{
xmlTextReaderPtr reader;
int eh;
Data_Get_Struct(self, xmlTextReader, reader);
eh = xmlTextReaderHasValue(reader);
if(eh == 0) return Qfalse;
if(eh == 1) return Qtrue;
return Qnil;
}
/*
* call-seq:
* attributes?
*
* Does this node have attributes?
*/
static VALUE attributes_eh(VALUE self)
{
xmlTextReaderPtr reader;
int eh;
Data_Get_Struct(self, xmlTextReader, reader);
eh = has_attributes(reader);
if(eh == 0) return Qfalse;
if(eh == 1) return Qtrue;
return Qnil;
}
/*
* call-seq:
* namespaces
*
* Get a hash of namespaces for this Node
*/
static VALUE namespaces(VALUE self)
{
xmlTextReaderPtr reader;
xmlNodePtr ptr;
VALUE attr ;
Data_Get_Struct(self, xmlTextReader, reader);
attr = rb_hash_new() ;
if (! has_attributes(reader))
return attr ;
ptr = xmlTextReaderExpand(reader);
if(ptr == NULL) return Qnil;
Nokogiri_xml_node_namespaces(ptr, attr);
return attr ;
}
/*
* call-seq:
* attribute_nodes
*
* Get a list of attributes for this Node
*/
static VALUE attribute_nodes(VALUE self)
{
xmlTextReaderPtr reader;
xmlNodePtr ptr;
VALUE attr ;
Data_Get_Struct(self, xmlTextReader, reader);
attr = rb_ary_new() ;
if (! has_attributes(reader))
return attr ;
ptr = xmlTextReaderExpand(reader);
if(ptr == NULL) return Qnil;
Nokogiri_xml_node_properties(ptr, attr);
return attr ;
}
/*
* call-seq:
* attribute_at(index)
*
* Get the value of attribute at +index+
*/
static VALUE attribute_at(VALUE self, VALUE index)
{
xmlTextReaderPtr reader;
xmlChar *value;
VALUE rb_value;
Data_Get_Struct(self, xmlTextReader, reader);
if(NIL_P(index)) return Qnil;
index = rb_Integer(index);
value = xmlTextReaderGetAttributeNo(
reader,
(int)NUM2INT(index)
);
if(value == NULL) return Qnil;
rb_value = NOKOGIRI_STR_NEW2(value);
xmlFree(value);
return rb_value;
}
/*
* call-seq:
* attribute(name)
*
* Get the value of attribute named +name+
*/
static VALUE reader_attribute(VALUE self, VALUE name)
{
xmlTextReaderPtr reader;
xmlChar *value ;
VALUE rb_value;
Data_Get_Struct(self, xmlTextReader, reader);
if(NIL_P(name)) return Qnil;
name = StringValue(name) ;
value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name));
if(value == NULL) {
/* this section is an attempt to workaround older versions of libxml that
don't handle namespaces properly in all attribute-and-friends functions */
xmlChar *prefix = NULL ;
xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix);
if (localname != NULL) {
value = xmlTextReaderLookupNamespace(reader, localname);
xmlFree(localname) ;
} else {
value = xmlTextReaderLookupNamespace(reader, prefix);
}
xmlFree(prefix);
}
if(value == NULL) return Qnil;
rb_value = NOKOGIRI_STR_NEW2(value);
xmlFree(value);
return rb_value;
}
/*
* call-seq:
* attribute_count
*
* Get the number of attributes for the current node
*/
static VALUE attribute_count(VALUE self)
{
xmlTextReaderPtr reader;
int count;
Data_Get_Struct(self, xmlTextReader, reader);
count = xmlTextReaderAttributeCount(reader);
if(count == -1) return Qnil;
return INT2NUM((long)count);
}
/*
* call-seq:
* depth
*
* Get the depth of the node
*/
static VALUE depth(VALUE self)
{
xmlTextReaderPtr reader;
int depth;
Data_Get_Struct(self, xmlTextReader, reader);
depth = xmlTextReaderDepth(reader);
if(depth == -1) return Qnil;
return INT2NUM((long)depth);
}
/*
* call-seq:
* xml_version
*
* Get the XML version of the document being read
*/
static VALUE xml_version(VALUE self)
{
xmlTextReaderPtr reader;
const char *version;
Data_Get_Struct(self, xmlTextReader, reader);
version = (const char *)xmlTextReaderConstXmlVersion(reader);
if(version == NULL) return Qnil;
return NOKOGIRI_STR_NEW2(version);
}
/*
* call-seq:
* lang
*
* Get the xml:lang scope within which the node resides.
*/
static VALUE lang(VALUE self)
{
xmlTextReaderPtr reader;
const char *lang;
Data_Get_Struct(self, xmlTextReader, reader);
lang = (const char *)xmlTextReaderConstXmlLang(reader);
if(lang == NULL) return Qnil;
return NOKOGIRI_STR_NEW2(lang);
}
/*
* call-seq:
* value
*
* Get the text value of the node if present. Returns a utf-8 encoded string.
*/
static VALUE value(VALUE self)
{
xmlTextReaderPtr reader;
const char *value;
Data_Get_Struct(self, xmlTextReader, reader);
value = (const char *)xmlTextReaderConstValue(reader);
if(value == NULL) return Qnil;
return NOKOGIRI_STR_NEW2(value);
}
/*
* call-seq:
* prefix
*
* Get the shorthand reference to the namespace associated with the node.
*/
static VALUE prefix(VALUE self)
{
xmlTextReaderPtr reader;
const char *prefix;
Data_Get_Struct(self, xmlTextReader, reader);
prefix = (const char *)xmlTextReaderConstPrefix(reader);
if(prefix == NULL) return Qnil;
return NOKOGIRI_STR_NEW2(prefix);
}
/*
* call-seq:
* namespace_uri
*
* Get the URI defining the namespace associated with the node
*/
static VALUE namespace_uri(VALUE self)
{
xmlTextReaderPtr reader;
const char *uri;
Data_Get_Struct(self, xmlTextReader, reader);
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
if(uri == NULL) return Qnil;
return NOKOGIRI_STR_NEW2(uri);
}
/*
* call-seq:
* local_name
*
* Get the local name of the node
*/
static VALUE local_name(VALUE self)
{
xmlTextReaderPtr reader;
const char *name;
Data_Get_Struct(self, xmlTextReader, reader);
name = (const char *)xmlTextReaderConstLocalName(reader);
if(name == NULL) return Qnil;
return NOKOGIRI_STR_NEW2(name);
}
/*
* call-seq:
* name
*
* Get the name of the node. Returns a utf-8 encoded string.
*/
static VALUE name(VALUE self)
{
xmlTextReaderPtr reader;
const char *name;
Data_Get_Struct(self, xmlTextReader, reader);
name = (const char *)xmlTextReaderConstName(reader);
if(name == NULL) return Qnil;
return NOKOGIRI_STR_NEW2(name);
}
/*
* call-seq:
* base_uri
*
* Get the xml:base of the node
*/
static VALUE base_uri(VALUE self)
{
xmlTextReaderPtr reader;
const char * base_uri;
Data_Get_Struct(self, xmlTextReader, reader);
base_uri = (const char *)xmlTextReaderBaseUri(reader);
if (base_uri == NULL) return Qnil;
return NOKOGIRI_STR_NEW2(base_uri);
}
/*
* call-seq:
* state
*
* Get the state of the reader
*/
static VALUE state(VALUE self)
{
xmlTextReaderPtr reader;
Data_Get_Struct(self, xmlTextReader, reader);
return INT2NUM((long)xmlTextReaderReadState(reader));
}
/*
* call-seq:
* node_type
*
* Get the type of readers current node
*/
static VALUE node_type(VALUE self)
{
xmlTextReaderPtr reader;
Data_Get_Struct(self, xmlTextReader, reader);
return INT2NUM((long)xmlTextReaderNodeType(reader));
}
/*
* call-seq:
* read
*
* Move the Reader forward through the XML document.
*/
static VALUE read_more(VALUE self)
{
xmlTextReaderPtr reader;
xmlErrorPtr error;
VALUE error_list;
int ret;
Data_Get_Struct(self, xmlTextReader, reader);
error_list = rb_funcall(self, rb_intern("errors"), 0);
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
ret = xmlTextReaderRead(reader);
xmlSetStructuredErrorFunc(NULL, NULL);
if(ret == 1) return self;
if(ret == 0) return Qnil;
error = xmlGetLastError();
if(error)
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
else
rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
return Qnil;
}
/*
* call-seq:
* inner_xml
*
* Read the contents of the current node, including child nodes and markup.
* Returns a utf-8 encoded string.
*/
static VALUE inner_xml(VALUE self)
{
xmlTextReaderPtr reader;
xmlChar* value;
VALUE str;
Data_Get_Struct(self, xmlTextReader, reader);
value = xmlTextReaderReadInnerXml(reader);
str = Qnil;
if(value) {
str = NOKOGIRI_STR_NEW2((char*)value);
xmlFree(value);
}
return str;
}
/*
* call-seq:
* outer_xml
*
* Read the current node and its contents, including child nodes and markup.
* Returns a utf-8 encoded string.
*/
static VALUE outer_xml(VALUE self)
{
xmlTextReaderPtr reader;
xmlChar *value;
VALUE str = Qnil;
Data_Get_Struct(self, xmlTextReader, reader);
value = xmlTextReaderReadOuterXml(reader);
if(value) {
str = NOKOGIRI_STR_NEW2((char*)value);
xmlFree(value);
}
return str;
}
/*
* call-seq:
* from_memory(string, url = nil, encoding = nil, options = 0)
*
* Create a new reader that parses +string+
*/
static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
{
VALUE rb_buffer, rb_url, encoding, rb_options;
xmlTextReaderPtr reader;
const char * c_url = NULL;
const char * c_encoding = NULL;
int c_options = 0;
VALUE rb_reader, args[3];
rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
reader = xmlReaderForMemory(
StringValuePtr(rb_buffer),
(int)RSTRING_LEN(rb_buffer),
c_url,
c_encoding,
c_options
);
if(reader == NULL) {
xmlFreeTextReader(reader);
rb_raise(rb_eRuntimeError, "couldn't create a parser");
}
rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
args[0] = rb_buffer;
args[1] = rb_url;
args[2] = encoding;
rb_obj_call_init(rb_reader, 3, args);
return rb_reader;
}
/*
* call-seq:
* from_io(io, url = nil, encoding = nil, options = 0)
*
* Create a new reader that parses +io+
*/
static VALUE from_io(int argc, VALUE *argv, VALUE klass)
{
VALUE rb_io, rb_url, encoding, rb_options;
xmlTextReaderPtr reader;
const char * c_url = NULL;
const char * c_encoding = NULL;
int c_options = 0;
VALUE rb_reader, args[3];
rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
reader = xmlReaderForIO(
(xmlInputReadCallback)io_read_callback,
(xmlInputCloseCallback)io_close_callback,
(void *)rb_io,
c_url,
c_encoding,
c_options
);
if(reader == NULL) {
xmlFreeTextReader(reader);
rb_raise(rb_eRuntimeError, "couldn't create a parser");
}
rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
args[0] = rb_io;
args[1] = rb_url;
args[2] = encoding;
rb_obj_call_init(rb_reader, 3, args);
return rb_reader;
}
/*
* call-seq:
* reader.empty_element? # => true or false
*
* Returns true if the current node is empty, otherwise false.
*/
static VALUE empty_element_p(VALUE self)
{
xmlTextReaderPtr reader;
Data_Get_Struct(self, xmlTextReader, reader);
if(xmlTextReaderIsEmptyElement(reader))
return Qtrue;
return Qfalse;
}
VALUE cNokogiriXmlReader;
void init_xml_reader()
{
VALUE module = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(module, "XML");
/*
* The Reader parser allows you to effectively pull parse an XML document.
* Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
* node. Note that you may only iterate over the document once!
*/
VALUE klass = rb_define_class_under(xml, "Reader", rb_cObject);
cNokogiriXmlReader = klass;
rb_define_singleton_method(klass, "from_memory", from_memory, -1);
rb_define_singleton_method(klass, "from_io", from_io, -1);
rb_define_method(klass, "read", read_more, 0);
rb_define_method(klass, "inner_xml", inner_xml, 0);
rb_define_method(klass, "outer_xml", outer_xml, 0);
rb_define_method(klass, "state", state, 0);
rb_define_method(klass, "node_type", node_type, 0);
rb_define_method(klass, "name", name, 0);
rb_define_method(klass, "local_name", local_name, 0);
rb_define_method(klass, "namespace_uri", namespace_uri, 0);
rb_define_method(klass, "prefix", prefix, 0);
rb_define_method(klass, "value", value, 0);
rb_define_method(klass, "lang", lang, 0);
rb_define_method(klass, "xml_version", xml_version, 0);
rb_define_method(klass, "depth", depth, 0);
rb_define_method(klass, "attribute_count", attribute_count, 0);
rb_define_method(klass, "attribute", reader_attribute, 1);
rb_define_method(klass, "namespaces", namespaces, 0);
rb_define_method(klass, "attribute_at", attribute_at, 1);
rb_define_method(klass, "empty_element?", empty_element_p, 0);
rb_define_method(klass, "attributes?", attributes_eh, 0);
rb_define_method(klass, "value?", value_eh, 0);
rb_define_method(klass, "default?", default_eh, 0);
rb_define_method(klass, "base_uri", base_uri, 0);
rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
}
nokogiri-1.6.1/ext/nokogiri/xml_encoding_handler.c 0000644 0001750 0001750 00000003310 12261213762 021645 0 ustar boutil boutil #include
/*
* call-seq: Nokogiri::EncodingHandler.[](name)
*
* Get the encoding handler for +name+
*/
static VALUE get(VALUE klass, VALUE key)
{
xmlCharEncodingHandlerPtr handler;
handler = xmlFindCharEncodingHandler(StringValuePtr(key));
if(handler)
return Data_Wrap_Struct(klass, NULL, NULL, handler);
return Qnil;
}
/*
* call-seq: Nokogiri::EncodingHandler.delete(name)
*
* Delete the encoding alias named +name+
*/
static VALUE delete(VALUE klass, VALUE name)
{
if(xmlDelEncodingAlias(StringValuePtr(name))) return Qnil;
return Qtrue;
}
/*
* call-seq: Nokogiri::EncodingHandler.alias(from, to)
*
* Alias encoding handler with name +from+ to name +to+
*/
static VALUE alias(VALUE klass, VALUE from, VALUE to)
{
xmlAddEncodingAlias(StringValuePtr(from), StringValuePtr(to));
return to;
}
/*
* call-seq: Nokogiri::EncodingHandler.clear_aliases!
*
* Remove all encoding aliases.
*/
static VALUE clear_aliases(VALUE klass)
{
xmlCleanupEncodingAliases();
return klass;
}
/*
* call-seq: name
*
* Get the name of this EncodingHandler
*/
static VALUE name(VALUE self)
{
xmlCharEncodingHandlerPtr handler;
Data_Get_Struct(self, xmlCharEncodingHandler, handler);
return NOKOGIRI_STR_NEW2(handler->name);
}
void init_xml_encoding_handler()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE klass = rb_define_class_under(nokogiri, "EncodingHandler", rb_cObject);
rb_define_singleton_method(klass, "[]", get, 1);
rb_define_singleton_method(klass, "delete", delete, 1);
rb_define_singleton_method(klass, "alias", alias, 2);
rb_define_singleton_method(klass, "clear_aliases!", clear_aliases, 0);
rb_define_method(klass, "name", name, 0);
}
nokogiri-1.6.1/ext/nokogiri/xml_cdata.h 0000644 0001750 0001750 00000000215 12261213762 017444 0 ustar boutil boutil #ifndef NOKOGIRI_XML_CDATA
#define NOKOGIRI_XML_CDATA
#include
void init_xml_cdata();
extern VALUE cNokogiriXmlCData;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_encoding_handler.h 0000644 0001750 0001750 00000000216 12261213762 021654 0 ustar boutil boutil #ifndef NOKOGIRI_XML_ENCODING_HANDLER
#define NOKOGIRI_XML_ENCODING_HANDLER
#include
void init_xml_encoding_handler();
#endif
nokogiri-1.6.1/ext/nokogiri/extconf.rb 0000644 0001750 0001750 00000014010 12261213762 017330 0 ustar boutil boutil ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
# :stopdoc:
require 'mkmf'
RbConfig::MAKEFILE_CONFIG['CC'] = ENV['CC'] if ENV['CC']
ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
LIBDIR = RbConfig::CONFIG['libdir']
@libdir_basename = "lib" # shrug, ruby 2.0 won't work for me.
INCLUDEDIR = RbConfig::CONFIG['includedir']
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'macruby'
$LIBRUBYARG_STATIC.gsub!(/-static/, '')
end
$CFLAGS << " #{ENV["CFLAGS"]}"
$LIBS << " #{ENV["LIBS"]}"
windows_p = RbConfig::CONFIG['target_os'] == 'mingw32' || RbConfig::CONFIG['target_os'] =~ /mswin/
if windows_p
$CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
elsif RbConfig::CONFIG['target_os'] =~ /solaris/
$CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
else
$CFLAGS << " -g -DXP_UNIX"
end
if RbConfig::MAKEFILE_CONFIG['CC'] =~ /mingw/
$CFLAGS << " -DIN_LIBXML"
$LIBS << " -lz" # TODO why is this necessary?
end
if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
$CFLAGS << " -O3" unless $CFLAGS[/-O\d/]
$CFLAGS << " -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
end
if windows_p
# I'm cross compiling!
HEADER_DIRS = [INCLUDEDIR]
LIB_DIRS = [LIBDIR]
XML2_HEADER_DIRS = [File.join(INCLUDEDIR, "libxml2"), INCLUDEDIR]
else
if ENV['NOKOGIRI_USE_SYSTEM_LIBRARIES']
HEADER_DIRS = [
# First search /opt/local for macports
'/opt/local/include',
# Then search /usr/local for people that installed from source
'/usr/local/include',
# Check the ruby install locations
INCLUDEDIR,
# Finally fall back to /usr
'/usr/include',
'/usr/include/libxml2',
]
LIB_DIRS = [
# First search /opt/local for macports
'/opt/local/lib',
# Then search /usr/local for people that installed from source
'/usr/local/lib',
# Check the ruby install locations
LIBDIR,
# Finally fall back to /usr
'/usr/lib',
]
XML2_HEADER_DIRS = [
'/opt/local/include/libxml2',
'/usr/local/include/libxml2',
File.join(INCLUDEDIR, "libxml2")
] + HEADER_DIRS
# If the user has homebrew installed, use the libxml2 inside homebrew
brew_prefix = `brew --prefix libxml2 2> /dev/null`.chomp
unless brew_prefix.empty?
LIB_DIRS.unshift File.join(brew_prefix, 'lib')
XML2_HEADER_DIRS.unshift File.join(brew_prefix, 'include/libxml2')
end
else
require 'mini_portile'
require 'yaml'
common_recipe = lambda do |recipe|
recipe.target = File.join(ROOT, "ports")
recipe.files = ["ftp://ftp.xmlsoft.org/libxml2/#{recipe.name}-#{recipe.version}.tar.gz"]
checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed"
unless File.exist?(checkpoint)
recipe.cook
FileUtils.touch checkpoint
end
recipe.activate
end
dependencies = YAML.load_file(File.join(ROOT, "dependencies.yml"))
libxml2_recipe = MiniPortile.new("libxml2", dependencies["libxml2"]).tap do |recipe|
recipe.configure_options = [
"--enable-shared",
"--disable-static",
"--without-python",
"--without-readline",
"--with-c14n",
"--with-debug",
"--with-threads"
]
common_recipe.call recipe
end
libxslt_recipe = MiniPortile.new("libxslt", dependencies["libxslt"]).tap do |recipe|
recipe.configure_options = [
"--enable-shared",
"--disable-static",
"--without-python",
"--without-crypto",
"--with-debug",
"--with-libxml-prefix=#{libxml2_recipe.path}"
]
common_recipe.call recipe
end
$LDFLAGS << " -Wl,-rpath,#{libxml2_recipe.path}/lib"
$LDFLAGS << " -Wl,-rpath,#{libxslt_recipe.path}/lib"
$CFLAGS << " -DNOKOGIRI_USE_PACKAGED_LIBRARIES -DNOKOGIRI_LIBXML2_PATH='\"#{libxml2_recipe.path}\"' -DNOKOGIRI_LIBXSLT_PATH='\"#{libxslt_recipe.path}\"'"
HEADER_DIRS = [libxml2_recipe, libxslt_recipe].map { |f| File.join(f.path, "include") }
LIB_DIRS = [libxml2_recipe, libxslt_recipe].map { |f| File.join(f.path, "lib") }
XML2_HEADER_DIRS = HEADER_DIRS + [File.join(libxml2_recipe.path, "include", "libxml2")]
end
end
dir_config('zlib', HEADER_DIRS, LIB_DIRS)
dir_config('iconv', HEADER_DIRS, LIB_DIRS)
dir_config('xml2', XML2_HEADER_DIRS, LIB_DIRS)
dir_config('xslt', HEADER_DIRS, LIB_DIRS)
def asplode(lib)
abort "-----\n#{lib} is missing. please visit http://nokogiri.org/tutorials/installing_nokogiri.html for help with installing dependencies.\n-----"
end
pkg_config('libxslt')
pkg_config('libxml-2.0')
pkg_config('libiconv')
def have_iconv?
%w{ iconv_open libiconv_open }.any? do |method|
have_func(method, 'iconv.h') or
have_library('iconv', method, 'iconv.h') or
find_library('iconv', method, 'iconv.h')
end
end
asplode "libxml2" unless find_header('libxml/parser.h')
asplode "libxslt" unless find_header('libxslt/xslt.h')
asplode "libexslt" unless find_header('libexslt/exslt.h')
asplode "libiconv" unless have_iconv?
asplode "libxml2" unless find_library("xml2", 'xmlParseDoc')
asplode "libxslt" unless find_library("xslt", 'xsltParseStylesheetDoc')
asplode "libexslt" unless find_library("exslt", 'exsltFuncRegister')
unless have_func('xmlHasFeature')
abort "-----\nThe function 'xmlHasFeature' is missing from your installation of libxml2. Likely this means that your installed version of libxml2 is old enough that nokogiri will not work well. To get around this problem, please upgrade your installation of libxml2.
Please visit http://nokogiri.org/tutorials/installing_nokogiri.html for more help!"
end
have_func 'xmlFirstElementChild'
have_func('xmlRelaxNGSetParserStructuredErrors')
have_func('xmlRelaxNGSetParserStructuredErrors')
have_func('xmlRelaxNGSetValidStructuredErrors')
have_func('xmlSchemaSetValidStructuredErrors')
have_func('xmlSchemaSetParserStructuredErrors')
if ENV['CPUPROFILE']
unless find_library('profiler', 'ProfilerEnable', *LIB_DIRS)
abort "google performance tools are not installed"
end
end
create_makefile('nokogiri/nokogiri')
# :startdoc:
nokogiri-1.6.1/ext/nokogiri/nokogiri.h 0000644 0001750 0001750 00000007412 12261213762 017337 0 ustar boutil boutil #ifndef NOKOGIRI_NATIVE
#define NOKOGIRI_NATIVE
#include
#include
#include
#include
#ifdef USE_INCLUDED_VASPRINTF
int vasprintf (char **strp, const char *fmt, va_list ap);
#else
#define _GNU_SOURCE
# include
#undef _GNU_SOURCE
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef HAVE_RUBY_ENCODING_H
#include
#else
#include
#endif
#ifndef UNUSED
# if defined(__GNUC__)
# define MAYBE_UNUSED(name) name __attribute__((unused))
# define UNUSED(name) MAYBE_UNUSED(UNUSED_ ## name)
# else
# define MAYBE_UNUSED(name) name
# define UNUSED(name) name
# endif
#endif
#ifndef NORETURN
# if defined(__GNUC__)
# define NORETURN(name) __attribute__((noreturn)) name
# else
# define NORETURN(name) name
# endif
#endif
#ifdef HAVE_RUBY_ENCODING_H
#include
#define NOKOGIRI_STR_NEW2(str) \
NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
#define NOKOGIRI_STR_NEW(str, len) \
rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
#else
#define NOKOGIRI_STR_NEW2(str) \
rb_str_new2((const char *)(str))
#define NOKOGIRI_STR_NEW(str, len) \
rb_str_new((const char *)(str), (long)(len))
#endif
#define RBSTR_OR_QNIL(_str) \
(_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
extern VALUE mNokogiri ;
extern VALUE mNokogiriXml ;
extern VALUE mNokogiriXmlSax ;
extern VALUE mNokogiriHtml ;
extern VALUE mNokogiriHtmlSax ;
extern VALUE mNokogiriXslt ;
void nokogiri_root_node(xmlNodePtr);
void nokogiri_root_nsdef(xmlNsPtr, xmlDocPtr);
#ifdef DEBUG
#define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_NO_FREE")) return ; if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p);
#define NOKOGIRI_DEBUG_END(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p end\n", __FILE__, __LINE__, p);
#else
#define NOKOGIRI_DEBUG_START(p)
#define NOKOGIRI_DEBUG_END(p)
#endif
#ifndef RSTRING_PTR
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
#endif
#ifndef RSTRING_LEN
#define RSTRING_LEN(s) (RSTRING(s)->len)
#endif
#ifndef RARRAY_PTR
#define RARRAY_PTR(a) RARRAY(a)->ptr
#endif
#ifndef RARRAY_LEN
#define RARRAY_LEN(a) RARRAY(a)->len
#endif
#ifndef __builtin_expect
# if defined(__GNUC__)
# define __builtin_expect(expr, c) __builtin_expect((long)(expr), (long)(c))
# endif
#endif
#define XMLNS_PREFIX "xmlns"
#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
#define XMLNS_BUFFER_LEN 128
#endif
nokogiri-1.6.1/ext/nokogiri/xml_node_set.c 0000644 0001750 0001750 00000030713 12261213762 020171 0 ustar boutil boutil #include
#include
static ID decorate ;
static int dealloc_namespace(xmlNsPtr ns)
{
if (ns->href)
xmlFree((xmlChar *)ns->href);
if (ns->prefix)
xmlFree((xmlChar *)ns->prefix);
xmlFree(ns);
return ST_CONTINUE;
}
static void deallocate(nokogiriNodeSetTuple *tuple)
{
/*
* xmlXPathFreeNodeSet() contains an implicit assumption that it is being
* called before any of its pointed-to nodes have been free()d. this
* assumption lies in the operation where it dereferences nodeTab pointers
* while searching for namespace nodes to free.
*
* however, since Ruby's GC mechanism cannot guarantee the strict order in
* which ruby objects will be GC'd, nodes may be garbage collected before a
* nodeset containing pointers to those nodes. (this is true regardless of
* how we declare dependencies between objects with rb_gc_mark().)
*
* as a result, xmlXPathFreeNodeSet() will perform unsafe memory operations,
* and calling it would be evil.
*
* so here, we *manually* free the set of namespace nodes that was
* constructed at initialization time (see Nokogiri_wrap_xml_node_set()), as
* well as the NodeSet, without using the official xmlXPathFreeNodeSet().
*
* there's probably a lesson in here somewhere about intermingling, within a
* single array, structs with different memory-ownership semantics. or more
* generally, a lesson about building an API in C/C++ that does not contain
* assumptions about the strict order in which memory will be released. hey,
* that sounds like a great idea for a blog post! get to it!
*
* "In Valgrind We Trust." seriously.
*/
xmlNodeSetPtr node_set;
node_set = tuple->node_set;
if (!node_set)
return;
NOKOGIRI_DEBUG_START(node_set) ;
st_foreach(tuple->namespaces, dealloc_namespace, 0);
if (node_set->nodeTab != NULL)
xmlFree(node_set->nodeTab);
xmlFree(node_set);
st_free_table(tuple->namespaces);
free(tuple);
NOKOGIRI_DEBUG_END(node_set) ;
}
static VALUE allocate(VALUE klass)
{
return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL), Qnil);
}
/*
* call-seq:
* dup
*
* Duplicate this node set
*/
static VALUE duplicate(VALUE self)
{
nokogiriNodeSetTuple *tuple;
xmlNodeSetPtr dupl;
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
dupl = xmlXPathNodeSetMerge(NULL, tuple->node_set);
return Nokogiri_wrap_xml_node_set(dupl, rb_iv_get(self, "@document"));
}
/*
* call-seq:
* length
*
* Get the length of the node set
*/
static VALUE length(VALUE self)
{
nokogiriNodeSetTuple *tuple;
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
return tuple->node_set ? INT2NUM(tuple->node_set->nodeNr) : INT2NUM(0);
}
/*
* call-seq:
* push(node)
*
* Append +node+ to the NodeSet.
*/
static VALUE push(VALUE self, VALUE rb_node)
{
nokogiriNodeSetTuple *tuple;
xmlNodePtr node;
if(!(rb_obj_is_kind_of(rb_node, cNokogiriXmlNode) || rb_obj_is_kind_of(rb_node, cNokogiriXmlNamespace)))
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node or Nokogiri::XML::Namespace");
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
Data_Get_Struct(rb_node, xmlNode, node);
xmlXPathNodeSetAdd(tuple->node_set, node);
return self;
}
/*
* call-seq:
* delete(node)
*
* Delete +node+ from the Nodeset, if it is a member. Returns the deleted node
* if found, otherwise returns nil.
*/
static VALUE
delete(VALUE self, VALUE rb_node)
{
nokogiriNodeSetTuple *tuple;
xmlNodePtr node;
xmlNodeSetPtr cur;
int i;
if (!(rb_obj_is_kind_of(rb_node, cNokogiriXmlNode) || rb_obj_is_kind_of(rb_node, cNokogiriXmlNamespace)))
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node or Nokogiri::XML::Namespace");
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
Data_Get_Struct(rb_node, xmlNode, node);
cur = tuple->node_set;
if (xmlXPathNodeSetContains(cur, node)) {
for (i = 0; i < cur->nodeNr; i++)
if (cur->nodeTab[i] == node) break;
cur->nodeNr--;
for (;i < cur->nodeNr;i++)
cur->nodeTab[i] = cur->nodeTab[i + 1];
cur->nodeTab[cur->nodeNr] = NULL;
return rb_node;
}
return Qnil ;
}
/*
* call-seq:
* &(node_set)
*
* Set Intersection — Returns a new NodeSet containing nodes common to the two NodeSets.
*/
static VALUE intersection(VALUE self, VALUE rb_other)
{
nokogiriNodeSetTuple *tuple, *other;
xmlNodeSetPtr intersection;
if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
Data_Get_Struct(rb_other, nokogiriNodeSetTuple, other);
intersection = xmlXPathIntersection(tuple->node_set, other->node_set);
return Nokogiri_wrap_xml_node_set(intersection, rb_iv_get(self, "@document"));
}
/*
* call-seq:
* include?(node)
*
* Returns true if any member of node set equals +node+.
*/
static VALUE include_eh(VALUE self, VALUE rb_node)
{
nokogiriNodeSetTuple *tuple;
xmlNodePtr node;
if(!(rb_obj_is_kind_of(rb_node, cNokogiriXmlNode) || rb_obj_is_kind_of(rb_node, cNokogiriXmlNamespace)))
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node or Nokogiri::XML::Namespace");
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
Data_Get_Struct(rb_node, xmlNode, node);
return (xmlXPathNodeSetContains(tuple->node_set, node) ? Qtrue : Qfalse);
}
/*
* call-seq:
* |(node_set)
*
* Returns a new set built by merging the set and the elements of the given
* set.
*/
static VALUE set_union(VALUE self, VALUE rb_other)
{
nokogiriNodeSetTuple *tuple, *other;
xmlNodeSetPtr new;
if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
Data_Get_Struct(rb_other, nokogiriNodeSetTuple, other);
new = xmlXPathNodeSetMerge(NULL, tuple->node_set);
new = xmlXPathNodeSetMerge(new, other->node_set);
return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document"));
}
/*
* call-seq:
* -(node_set)
*
* Difference - returns a new NodeSet that is a copy of this NodeSet, removing
* each item that also appears in +node_set+
*/
static VALUE minus(VALUE self, VALUE rb_other)
{
nokogiriNodeSetTuple *tuple, *other;
xmlNodeSetPtr new;
int j ;
if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
Data_Get_Struct(rb_other, nokogiriNodeSetTuple, other);
new = xmlXPathNodeSetMerge(NULL, tuple->node_set);
for (j = 0 ; j < other->node_set->nodeNr ; ++j) {
xmlXPathNodeSetDel(new, other->node_set->nodeTab[j]);
}
return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document"));
}
static VALUE index_at(VALUE self, long offset)
{
xmlNodeSetPtr node_set;
nokogiriNodeSetTuple *tuple;
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
node_set = tuple->node_set;
if (offset >= node_set->nodeNr || abs((int)offset) > node_set->nodeNr)
return Qnil;
if (offset < 0)
offset += node_set->nodeNr;
if (XML_NAMESPACE_DECL == node_set->nodeTab[offset]->type)
return Nokogiri_wrap_xml_namespace2(rb_iv_get(self, "@document"), (xmlNsPtr)(node_set->nodeTab[offset]));
return Nokogiri_wrap_xml_node(Qnil, node_set->nodeTab[offset]);
}
static VALUE subseq(VALUE self, long beg, long len)
{
long j;
nokogiriNodeSetTuple *tuple;
xmlNodeSetPtr node_set;
xmlNodeSetPtr new_set ;
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
node_set = tuple->node_set;
if (beg > node_set->nodeNr) return Qnil ;
if (beg < 0 || len < 0) return Qnil ;
if ((beg + len) > node_set->nodeNr) {
len = node_set->nodeNr - beg ;
}
new_set = xmlXPathNodeSetCreate(NULL);
for (j = beg ; j < beg+len ; ++j) {
xmlXPathNodeSetAddUnique(new_set, node_set->nodeTab[j]);
}
return Nokogiri_wrap_xml_node_set(new_set, rb_iv_get(self, "@document"));
}
/*
* call-seq:
* [index] -> Node or nil
* [start, length] -> NodeSet or nil
* [range] -> NodeSet or nil
* slice(index) -> Node or nil
* slice(start, length) -> NodeSet or nil
* slice(range) -> NodeSet or nil
*
* Element reference - returns the node at +index+, or returns a NodeSet
* containing nodes starting at +start+ and continuing for +length+ elements, or
* returns a NodeSet containing nodes specified by +range+. Negative +indices+
* count backward from the end of the +node_set+ (-1 is the last node). Returns
* nil if the +index+ (or +start+) are out of range.
*/
static VALUE slice(int argc, VALUE *argv, VALUE self)
{
VALUE arg ;
long beg, len ;
xmlNodeSetPtr node_set;
nokogiriNodeSetTuple *tuple;
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
node_set = tuple->node_set;
if (argc == 2) {
beg = NUM2LONG(argv[0]);
len = NUM2LONG(argv[1]);
if (beg < 0) {
beg += node_set->nodeNr ;
}
return subseq(self, beg, len);
}
if (argc != 1) {
rb_scan_args(argc, argv, "11", NULL, NULL);
}
arg = argv[0];
if (FIXNUM_P(arg)) {
return index_at(self, FIX2LONG(arg));
}
/* if arg is Range */
switch (rb_range_beg_len(arg, &beg, &len, (long)node_set->nodeNr, 0)) {
case Qfalse:
break;
case Qnil:
return Qnil;
default:
return subseq(self, beg, len);
}
return index_at(self, NUM2LONG(arg));
}
/*
* call-seq:
* to_a
*
* Return this list as an Array
*/
static VALUE to_array(VALUE self, VALUE rb_node)
{
xmlNodeSetPtr set;
VALUE *elts;
VALUE list;
int i;
nokogiriNodeSetTuple *tuple;
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
set = tuple->node_set;
elts = calloc((size_t)set->nodeNr, sizeof(VALUE *));
for(i = 0; i < set->nodeNr; i++) {
if (XML_NAMESPACE_DECL == set->nodeTab[i]->type)
elts[i] = Nokogiri_wrap_xml_namespace2(rb_iv_get(self, "@document"), (xmlNsPtr)(set->nodeTab[i]));
else
elts[i] = Nokogiri_wrap_xml_node(Qnil, set->nodeTab[i]);
}
list = rb_ary_new4((long)set->nodeNr, elts);
/*free(elts); */
return list;
}
/*
* call-seq:
* unlink
*
* Unlink this NodeSet and all Node objects it contains from their current context.
*/
static VALUE unlink_nodeset(VALUE self)
{
xmlNodeSetPtr node_set;
int j, nodeNr ;
nokogiriNodeSetTuple *tuple;
Data_Get_Struct(self, nokogiriNodeSetTuple, tuple);
node_set = tuple->node_set;
nodeNr = node_set->nodeNr ;
for (j = 0 ; j < nodeNr ; j++) {
if (XML_NAMESPACE_DECL != node_set->nodeTab[j]->type) {
VALUE node ;
xmlNodePtr node_ptr;
node = Nokogiri_wrap_xml_node(Qnil, node_set->nodeTab[j]);
rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */
Data_Get_Struct(node, xmlNode, node_ptr);
node_set->nodeTab[j] = node_ptr ;
}
}
return self ;
}
VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document)
{
VALUE new_set ;
int i;
xmlNodePtr cur;
xmlNsPtr ns;
nokogiriNodeSetTuple *tuple;
new_set = Data_Make_Struct(cNokogiriXmlNodeSet, nokogiriNodeSetTuple, 0,
deallocate, tuple);
tuple->node_set = node_set;
tuple->namespaces = st_init_numtable();
if (!NIL_P(document)) {
rb_iv_set(new_set, "@document", document);
rb_funcall(document, decorate, 1, new_set);
}
if (node_set && node_set->nodeTab) {
for (i = 0; i < node_set->nodeNr; i++) {
cur = node_set->nodeTab[i];
if (cur && cur->type == XML_NAMESPACE_DECL) {
ns = (xmlNsPtr)cur;
if (ns->next && ns->next->type != XML_NAMESPACE_DECL)
st_insert(tuple->namespaces, (st_data_t)cur, (st_data_t)0);
}
}
}
return new_set ;
}
VALUE cNokogiriXmlNodeSet ;
void init_xml_node_set(void)
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE klass = rb_define_class_under(xml, "NodeSet", rb_cObject);
cNokogiriXmlNodeSet = klass;
rb_define_alloc_func(klass, allocate);
rb_define_method(klass, "length", length, 0);
rb_define_method(klass, "[]", slice, -1);
rb_define_method(klass, "slice", slice, -1);
rb_define_method(klass, "push", push, 1);
rb_define_method(klass, "|", set_union, 1);
rb_define_method(klass, "-", minus, 1);
rb_define_method(klass, "unlink", unlink_nodeset, 0);
rb_define_method(klass, "to_a", to_array, 0);
rb_define_method(klass, "dup", duplicate, 0);
rb_define_method(klass, "delete", delete, 1);
rb_define_method(klass, "&", intersection, 1);
rb_define_method(klass, "include?", include_eh, 1);
decorate = rb_intern("decorate");
}
nokogiri-1.6.1/ext/nokogiri/xml_sax_push_parser.h 0000644 0001750 0001750 00000000264 12261213762 021602 0 ustar boutil boutil #ifndef NOKOGIRI_XML_SAX_PUSH_PARSER
#define NOKOGIRI_XML_SAX_PUSH_PARSER
#include
void init_xml_sax_push_parser();
extern VALUE cNokogiriXmlSaxPushParser ;
#endif
nokogiri-1.6.1/ext/nokogiri/html_entity_lookup.h 0000644 0001750 0001750 00000000210 12261213762 021434 0 ustar boutil boutil #ifndef NOKOGIRI_HTML_ENTITY_LOOKUP
#define NOKOGIRI_HTML_ENTITY_LOOKUP
#include
void init_html_entity_lookup();
#endif
nokogiri-1.6.1/ext/nokogiri/xml_node.c 0000644 0001750 0001750 00000112051 12261213762 017312 0 ustar boutil boutil #include
static ID decorate, decorate_bang;
#ifdef DEBUG
static void debug_node_dealloc(xmlNodePtr x)
{
NOKOGIRI_DEBUG_START(x)
NOKOGIRI_DEBUG_END(x)
}
#else
# define debug_node_dealloc 0
#endif
static void mark(xmlNodePtr node)
{
rb_gc_mark(DOC_RUBY_OBJECT(node->doc));
}
/* :nodoc: */
typedef xmlNodePtr (*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
/* :nodoc: */
static void relink_namespace(xmlNodePtr reparented)
{
xmlChar *name, *prefix;
xmlNodePtr child;
xmlNsPtr ns;
if (reparented->type != XML_ATTRIBUTE_NODE &&
reparented->type != XML_ELEMENT_NODE) return;
if (reparented->ns == NULL || reparented->ns->prefix == NULL) {
name = xmlSplitQName2(reparented->name, &prefix);
if(reparented->type == XML_ATTRIBUTE_NODE) {
if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) return;
}
ns = xmlSearchNs(reparented->doc, reparented, prefix);
if (ns == NULL && reparented->parent) {
ns = xmlSearchNs(reparented->doc, reparented->parent, prefix);
}
if (ns != NULL) {
xmlNodeSetName(reparented, name);
xmlSetNs(reparented, ns);
}
}
/* Avoid segv when relinking against unlinked nodes. */
if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) return;
/* Make sure that our reparented node has the correct namespaces */
if(!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent)
xmlSetNs(reparented, reparented->parent->ns);
/* Search our parents for an existing definition */
if(reparented->nsDef) {
xmlNsPtr curr = reparented->nsDef;
xmlNsPtr prev = NULL;
while(curr) {
xmlNsPtr ns = xmlSearchNsByHref(
reparented->doc,
reparented->parent,
curr->href
);
/* If we find the namespace is already declared, remove it from this
* definition list. */
if(ns && ns != curr) {
if (prev) {
prev->next = curr->next;
} else {
reparented->nsDef = curr->next;
}
nokogiri_root_nsdef(curr, reparented->doc);
} else {
prev = curr;
}
curr = curr->next;
}
}
/* Only walk all children if there actually is a namespace we need to */
/* reparent. */
if(NULL == reparented->ns) return;
/* When a node gets reparented, walk it's children to make sure that */
/* their namespaces are reparented as well. */
child = reparented->children;
while(NULL != child) {
relink_namespace(child);
child = child->next;
}
if (reparented->type == XML_ELEMENT_NODE) {
child = (xmlNodePtr)((xmlElementPtr)reparented)->attributes;
while(NULL != child) {
relink_namespace(child);
child = child->next;
}
}
}
/* :nodoc: */
static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
{
xmlNodePtr retval ;
retval = xmlReplaceNode(pivot, new_node) ;
if (retval == pivot) {
retval = new_node ; /* return semantics for reparent_node_with */
}
/* work around libxml2 issue: https://bugzilla.gnome.org/show_bug.cgi?id=615612 */
if (retval && retval->type == XML_TEXT_NODE) {
if (retval->prev && retval->prev->type == XML_TEXT_NODE) {
retval = xmlTextMerge(retval->prev, retval);
}
if (retval->next && retval->next->type == XML_TEXT_NODE) {
retval = xmlTextMerge(retval, retval->next);
}
}
return retval ;
}
/* :nodoc: */
static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
{
VALUE reparented_obj ;
xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text ;
if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument))
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
Data_Get_Struct(pivot_obj, xmlNode, pivot);
if(XML_DOCUMENT_NODE == reparentee->type || XML_HTML_DOCUMENT_NODE == reparentee->type)
rb_raise(rb_eArgError, "cannot reparent a document node");
xmlUnlinkNode(reparentee);
if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
/*
* if the reparentee is a text node, there's a very good chance it will be
* merged with an adjacent text node after being reparented, and in that case
* libxml will free the underlying C struct.
*
* since we clearly have a ruby object which references the underlying
* memory, we can't let the C struct get freed. let's pickle the original
* reparentee by rooting it; and then we'll reparent a duplicate of the
* node that we don't care about preserving.
*
* alternatively, if the reparentee is from a different document than the
* pivot node, libxml2 is going to get confused about which document's
* "dictionary" the node's strings belong to (this is an otherwise
* uninteresting libxml2 implementation detail). as a result, we cannot
* reparent the actual reparentee, so we reparent a duplicate.
*/
nokogiri_root_node(reparentee);
if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
}
}
if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
&& reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
/*
* libxml merges text nodes in a right-to-left fashion, meaning that if
* there are two text nodes who would be adjacent, the right (or following,
* or next) node will be merged into the left (or preceding, or previous)
* node.
*
* and by "merged" I mean the string contents will be concatenated onto the
* left node's contents, and then the node will be freed.
*
* which means that if we have a ruby object wrapped around the right node,
* its memory would be freed out from under it.
*
* so, we detect this edge case and unlink-and-root the text node before it gets
* merged. then we dup the node and insert that duplicate back into the
* document where the real node was.
*
* yes, this is totally lame.
*/
next_text = pivot->next ;
new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
xmlUnlinkNode(next_text);
nokogiri_root_node(next_text);
xmlAddNextSibling(pivot, new_next_text);
}
if(!(reparented = (*prf)(pivot, reparentee))) {
rb_raise(rb_eRuntimeError, "Could not reparent node");
}
/*
* make sure the ruby object is pointed at the just-reparented node, which
* might be a duplicate (see above) or might be the result of merging
* adjacent text nodes.
*/
DATA_PTR(reparentee_obj) = reparented ;
relink_namespace(reparented);
reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);
rb_funcall(reparented_obj, decorate_bang, 0);
return reparented_obj ;
}
/*
* call-seq:
* document
*
* Get the document for this Node
*/
static VALUE document(VALUE self)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
return DOC_RUBY_OBJECT(node->doc);
}
/*
* call-seq:
* pointer_id
*
* Get the internal pointer number
*/
static VALUE pointer_id(VALUE self)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
return INT2NUM((long)(node));
}
/*
* call-seq:
* encode_special_chars(string)
*
* Encode any special characters in +string+
*/
static VALUE encode_special_chars(VALUE self, VALUE string)
{
xmlNodePtr node;
xmlChar *encoded;
VALUE encoded_str;
Data_Get_Struct(self, xmlNode, node);
encoded = xmlEncodeSpecialChars(
node->doc,
(const xmlChar *)StringValuePtr(string)
);
encoded_str = NOKOGIRI_STR_NEW2(encoded);
xmlFree(encoded);
return encoded_str;
}
/*
* call-seq:
* create_internal_subset(name, external_id, system_id)
*
* Create the internal subset of a document.
*
* doc.create_internal_subset("chapter", "-//OASIS//DTD DocBook XML//EN", "chapter.dtd")
* # =>
*
* doc.create_internal_subset("chapter", nil, "chapter.dtd")
* # =>
*/
static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
{
xmlNodePtr node;
xmlDocPtr doc;
xmlDtdPtr dtd;
Data_Get_Struct(self, xmlNode, node);
doc = node->doc;
if(xmlGetIntSubset(doc))
rb_raise(rb_eRuntimeError, "Document already has an internal subset");
dtd = xmlCreateIntSubset(
doc,
NIL_P(name) ? NULL : (const xmlChar *)StringValuePtr(name),
NIL_P(external_id) ? NULL : (const xmlChar *)StringValuePtr(external_id),
NIL_P(system_id) ? NULL : (const xmlChar *)StringValuePtr(system_id)
);
if(!dtd) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
}
/*
* call-seq:
* create_external_subset(name, external_id, system_id)
*
* Create an external subset
*/
static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
{
xmlNodePtr node;
xmlDocPtr doc;
xmlDtdPtr dtd;
Data_Get_Struct(self, xmlNode, node);
doc = node->doc;
if(doc->extSubset)
rb_raise(rb_eRuntimeError, "Document already has an external subset");
dtd = xmlNewDtd(
doc,
NIL_P(name) ? NULL : (const xmlChar *)StringValuePtr(name),
NIL_P(external_id) ? NULL : (const xmlChar *)StringValuePtr(external_id),
NIL_P(system_id) ? NULL : (const xmlChar *)StringValuePtr(system_id)
);
if(!dtd) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
}
/*
* call-seq:
* external_subset
*
* Get the external subset
*/
static VALUE external_subset(VALUE self)
{
xmlNodePtr node;
xmlDocPtr doc;
xmlDtdPtr dtd;
Data_Get_Struct(self, xmlNode, node);
if(!node->doc) return Qnil;
doc = node->doc;
dtd = doc->extSubset;
if(!dtd) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
}
/*
* call-seq:
* internal_subset
*
* Get the internal subset
*/
static VALUE internal_subset(VALUE self)
{
xmlNodePtr node;
xmlDocPtr doc;
xmlDtdPtr dtd;
Data_Get_Struct(self, xmlNode, node);
if(!node->doc) return Qnil;
doc = node->doc;
dtd = xmlGetIntSubset(doc);
if(!dtd) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
}
/*
* call-seq:
* dup
*
* Copy this node. An optional depth may be passed in, but it defaults
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
*/
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
{
VALUE level;
xmlNodePtr node, dup;
if(rb_scan_args(argc, argv, "01", &level) == 0)
level = INT2NUM((long)1);
Data_Get_Struct(self, xmlNode, node);
dup = xmlDocCopyNode(node, node->doc, (int)NUM2INT(level));
if(dup == NULL) return Qnil;
nokogiri_root_node(dup);
return Nokogiri_wrap_xml_node(rb_obj_class(self), dup);
}
/*
* call-seq:
* unlink
*
* Unlink this node from its current context.
*/
static VALUE unlink_node(VALUE self)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
xmlUnlinkNode(node);
nokogiri_root_node(node);
return self;
}
/*
* call-seq:
* blank?
*
* Is this node blank?
*/
static VALUE blank_eh(VALUE self)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
}
/*
* call-seq:
* next_sibling
*
* Returns the next sibling node
*/
static VALUE next_sibling(VALUE self)
{
xmlNodePtr node, sibling;
Data_Get_Struct(self, xmlNode, node);
sibling = node->next;
if(!sibling) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, sibling) ;
}
/*
* call-seq:
* previous_sibling
*
* Returns the previous sibling node
*/
static VALUE previous_sibling(VALUE self)
{
xmlNodePtr node, sibling;
Data_Get_Struct(self, xmlNode, node);
sibling = node->prev;
if(!sibling) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, sibling);
}
/*
* call-seq:
* next_element
*
* Returns the next Nokogiri::XML::Element type sibling node.
*/
static VALUE next_element(VALUE self)
{
xmlNodePtr node, sibling;
Data_Get_Struct(self, xmlNode, node);
sibling = xmlNextElementSibling(node);
if(!sibling) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, sibling);
}
/*
* call-seq:
* previous_element
*
* Returns the previous Nokogiri::XML::Element type sibling node.
*/
static VALUE previous_element(VALUE self)
{
xmlNodePtr node, sibling;
Data_Get_Struct(self, xmlNode, node);
/*
* note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
*/
sibling = node->prev;
if(!sibling) return Qnil;
while(sibling && sibling->type != XML_ELEMENT_NODE)
sibling = sibling->prev;
return sibling ? Nokogiri_wrap_xml_node(Qnil, sibling) : Qnil ;
}
/* :nodoc: */
static VALUE replace(VALUE self, VALUE new_node)
{
VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
xmlNodePtr pivot;
Data_Get_Struct(self, xmlNode, pivot);
nokogiri_root_node(pivot);
return reparent;
}
/*
* call-seq:
* children
*
* Get the list of children for this node as a NodeSet
*/
static VALUE children(VALUE self)
{
xmlNodePtr node;
xmlNodePtr child;
xmlNodeSetPtr set;
VALUE document;
VALUE node_set;
Data_Get_Struct(self, xmlNode, node);
child = node->children;
set = xmlXPathNodeSetCreate(child);
document = DOC_RUBY_OBJECT(node->doc);
if(!child) return Nokogiri_wrap_xml_node_set(set, document);
child = child->next;
while(NULL != child) {
xmlXPathNodeSetAddUnique(set, child);
child = child->next;
}
node_set = Nokogiri_wrap_xml_node_set(set, document);
return node_set;
}
/*
* call-seq:
* element_children
*
* Get the list of children for this node as a NodeSet. All nodes will be
* element nodes.
*
* Example:
*
* @doc.root.element_children.all? { |x| x.element? } # => true
*/
static VALUE element_children(VALUE self)
{
xmlNodePtr node;
xmlNodePtr child;
xmlNodeSetPtr set;
VALUE document;
VALUE node_set;
Data_Get_Struct(self, xmlNode, node);
child = xmlFirstElementChild(node);
set = xmlXPathNodeSetCreate(child);
document = DOC_RUBY_OBJECT(node->doc);
if(!child) return Nokogiri_wrap_xml_node_set(set, document);
child = xmlNextElementSibling(child);
while(NULL != child) {
xmlXPathNodeSetAddUnique(set, child);
child = xmlNextElementSibling(child);
}
node_set = Nokogiri_wrap_xml_node_set(set, document);
return node_set;
}
/*
* call-seq:
* child
*
* Returns the child node
*/
static VALUE child(VALUE self)
{
xmlNodePtr node, child;
Data_Get_Struct(self, xmlNode, node);
child = node->children;
if(!child) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, child);
}
/*
* call-seq:
* first_element_child
*
* Returns the first child node of this node that is an element.
*
* Example:
*
* @doc.root.first_element_child.element? # => true
*/
static VALUE first_element_child(VALUE self)
{
xmlNodePtr node, child;
Data_Get_Struct(self, xmlNode, node);
child = xmlFirstElementChild(node);
if(!child) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, child);
}
/*
* call-seq:
* last_element_child
*
* Returns the last child node of this node that is an element.
*
* Example:
*
* @doc.root.last_element_child.element? # => true
*/
static VALUE last_element_child(VALUE self)
{
xmlNodePtr node, child;
Data_Get_Struct(self, xmlNode, node);
child = xmlLastElementChild(node);
if(!child) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, child);
}
/*
* call-seq:
* key?(attribute)
*
* Returns true if +attribute+ is set
*/
static VALUE key_eh(VALUE self, VALUE attribute)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
if(xmlHasProp(node, (xmlChar *)StringValuePtr(attribute)))
return Qtrue;
return Qfalse;
}
/*
* call-seq:
* namespaced_key?(attribute, namespace)
*
* Returns true if +attribute+ is set with +namespace+
*/
static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
if(xmlHasNsProp(node, (xmlChar *)StringValuePtr(attribute),
NIL_P(namespace) ? NULL : (xmlChar *)StringValuePtr(namespace)))
return Qtrue;
return Qfalse;
}
/*
* call-seq:
* []=(property, value)
*
* Set the +property+ to +value+
*/
static VALUE set(VALUE self, VALUE property, VALUE value)
{
xmlNodePtr node, cur;
xmlAttrPtr prop;
Data_Get_Struct(self, xmlNode, node);
/* If a matching attribute node already exists, then xmlSetProp will destroy
* the existing node's children. However, if Nokogiri has a node object
* pointing to one of those children, we are left with a broken reference.
*
* We can avoid this by unlinking these nodes first.
*/
if (node->type != XML_ELEMENT_NODE)
return(Qnil);
prop = xmlHasProp(node, (xmlChar *)StringValuePtr(property));
if (prop && prop->children) {
for (cur = prop->children; cur; cur = cur->next) {
if (cur->_private) {
nokogiri_root_node(cur);
xmlUnlinkNode(cur);
}
}
}
xmlSetProp(node, (xmlChar *)StringValuePtr(property),
(xmlChar *)StringValuePtr(value));
return value;
}
/*
* call-seq:
* get(attribute)
*
* Get the value for +attribute+
*/
static VALUE get(VALUE self, VALUE rattribute)
{
xmlNodePtr node;
xmlChar* value = 0;
VALUE rvalue ;
char* attribute = 0;
char *colon = 0, *attr_name = 0, *prefix = 0;
xmlNsPtr ns;
if (NIL_P(rattribute)) return Qnil;
Data_Get_Struct(self, xmlNode, node);
attribute = strdup(StringValuePtr(rattribute));
colon = strchr(attribute, ':');
if (colon) {
(*colon) = 0 ; /* create two null-terminated strings of the prefix and attribute name */
prefix = attribute ;
attr_name = colon + 1 ;
ns = xmlSearchNs(node->doc, node, (const xmlChar *)(prefix));
if (ns) {
value = xmlGetNsProp(node, (xmlChar*)(attr_name), ns->href);
} else {
value = xmlGetProp(node, (xmlChar*)StringValuePtr(rattribute));
}
} else {
value = xmlGetNoNsProp(node, (xmlChar*)attribute);
}
free(attribute);
if (!value) return Qnil;
rvalue = NOKOGIRI_STR_NEW2(value);
xmlFree(value);
return rvalue ;
}
/*
* call-seq:
* set_namespace(namespace)
*
* Set the namespace to +namespace+
*/
static VALUE set_namespace(VALUE self, VALUE namespace)
{
xmlNodePtr node;
xmlNsPtr ns = NULL;
Data_Get_Struct(self, xmlNode, node);
if(!NIL_P(namespace))
Data_Get_Struct(namespace, xmlNs, ns);
xmlSetNs(node, ns);
return self;
}
/*
* call-seq:
* attribute(name)
*
* Get the attribute node with +name+
*/
static VALUE attr(VALUE self, VALUE name)
{
xmlNodePtr node;
xmlAttrPtr prop;
Data_Get_Struct(self, xmlNode, node);
prop = xmlHasProp(node, (xmlChar *)StringValuePtr(name));
if(! prop) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
}
/*
* call-seq:
* attribute_with_ns(name, namespace)
*
* Get the attribute node with +name+ and +namespace+
*/
static VALUE attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
{
xmlNodePtr node;
xmlAttrPtr prop;
Data_Get_Struct(self, xmlNode, node);
prop = xmlHasNsProp(node, (xmlChar *)StringValuePtr(name),
NIL_P(namespace) ? NULL : (xmlChar *)StringValuePtr(namespace));
if(! prop) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
}
/*
* call-seq:
* attribute_nodes()
*
* returns a list containing the Node attributes.
*/
static VALUE attribute_nodes(VALUE self)
{
/* this code in the mode of xmlHasProp() */
xmlNodePtr node;
VALUE attr;
Data_Get_Struct(self, xmlNode, node);
attr = rb_ary_new();
Nokogiri_xml_node_properties(node, attr);
return attr ;
}
/*
* call-seq:
* namespace()
*
* returns the default namespace set on this node (as with an "xmlns="
* attribute), as a Namespace object.
*/
static VALUE namespace(VALUE self)
{
xmlNodePtr node ;
Data_Get_Struct(self, xmlNode, node);
if (node->ns)
return Nokogiri_wrap_xml_namespace(node->doc, node->ns);
return Qnil ;
}
/*
* call-seq:
* namespace_definitions()
*
* returns namespaces defined on self element directly, as an array of Namespace objects. Includes both a default namespace (as in"xmlns="), and prefixed namespaces (as in "xmlns:prefix=").
*/
static VALUE namespace_definitions(VALUE self)
{
/* this code in the mode of xmlHasProp() */
xmlNodePtr node ;
VALUE list;
xmlNsPtr ns;
Data_Get_Struct(self, xmlNode, node);
list = rb_ary_new();
ns = node->nsDef;
if(!ns) return list;
while(NULL != ns) {
rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns));
ns = ns->next;
}
return list;
}
/*
* call-seq:
* namespace_scopes()
*
* returns namespaces in scope for self -- those defined on self element
* directly or any ancestor node -- as an array of Namespace objects. Default
* namespaces ("xmlns=" style) for self are included in this array; Default
* namespaces for ancestors, however, are not. See also #namespaces
*/
static VALUE namespace_scopes(VALUE self)
{
xmlNodePtr node ;
VALUE list;
xmlNsPtr *ns_list;
int j;
Data_Get_Struct(self, xmlNode, node);
list = rb_ary_new();
ns_list = xmlGetNsList(node->doc, node);
if(!ns_list) return list;
for (j = 0 ; ns_list[j] != NULL ; ++j) {
rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns_list[j]));
}
xmlFree(ns_list);
return list;
}
/*
* call-seq:
* node_type
*
* Get the type for this Node
*/
static VALUE node_type(VALUE self)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
return INT2NUM((long)node->type);
}
/*
* call-seq:
* content=
*
* Set the content for this Node
*/
static VALUE native_content(VALUE self, VALUE content)
{
xmlNodePtr node, child, next ;
Data_Get_Struct(self, xmlNode, node);
child = node->children;
while (NULL != child) {
next = child->next ;
xmlUnlinkNode(child) ;
nokogiri_root_node(child);
child = next ;
}
xmlNodeSetContent(node, (xmlChar *)StringValuePtr(content));
return content;
}
/*
* call-seq:
* content
*
* Returns the content for this Node
*/
static VALUE get_content(VALUE self)
{
xmlNodePtr node;
xmlChar * content;
Data_Get_Struct(self, xmlNode, node);
content = xmlNodeGetContent(node);
if(content) {
VALUE rval = NOKOGIRI_STR_NEW2(content);
xmlFree(content);
return rval;
}
return Qnil;
}
/* :nodoc: */
static VALUE add_child(VALUE self, VALUE new_child)
{
return reparent_node_with(self, new_child, xmlAddChild);
}
/*
* call-seq:
* parent
*
* Get the parent Node for this Node
*/
static VALUE get_parent(VALUE self)
{
xmlNodePtr node, parent;
Data_Get_Struct(self, xmlNode, node);
parent = node->parent;
if(!parent) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, parent) ;
}
/*
* call-seq:
* name=(new_name)
*
* Set the name for this Node
*/
static VALUE set_name(VALUE self, VALUE new_name)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
xmlNodeSetName(node, (xmlChar*)StringValuePtr(new_name));
return new_name;
}
/*
* call-seq:
* name
*
* Returns the name for this Node
*/
static VALUE get_name(VALUE self)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
if(node->name)
return NOKOGIRI_STR_NEW2(node->name);
return Qnil;
}
/*
* call-seq:
* path
*
* Returns the path associated with this Node
*/
static VALUE path(VALUE self)
{
xmlNodePtr node;
xmlChar *path ;
VALUE rval;
Data_Get_Struct(self, xmlNode, node);
path = xmlGetNodePath(node);
rval = NOKOGIRI_STR_NEW2(path);
xmlFree(path);
return rval ;
}
/* :nodoc: */
static VALUE add_next_sibling(VALUE self, VALUE new_sibling)
{
return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
}
/* :nodoc: */
static VALUE add_previous_sibling(VALUE self, VALUE new_sibling)
{
return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
}
/*
* call-seq:
* native_write_to(io, encoding, options)
*
* Write this Node to +io+ with +encoding+ and +options+
*/
static VALUE native_write_to(
VALUE self,
VALUE io,
VALUE encoding,
VALUE indent_string,
VALUE options
) {
xmlNodePtr node;
const char * before_indent;
xmlSaveCtxtPtr savectx;
Data_Get_Struct(self, xmlNode, node);
xmlIndentTreeOutput = 1;
before_indent = xmlTreeIndentString;
xmlTreeIndentString = StringValuePtr(indent_string);
savectx = xmlSaveToIO(
(xmlOutputWriteCallback)io_write_callback,
(xmlOutputCloseCallback)io_close_callback,
(void *)io,
RTEST(encoding) ? StringValuePtr(encoding) : NULL,
(int)NUM2INT(options)
);
xmlSaveTree(savectx, node);
xmlSaveClose(savectx);
xmlTreeIndentString = before_indent;
return io;
}
/*
* call-seq:
* line
*
* Returns the line for this Node
*/
static VALUE line(VALUE self)
{
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);
return INT2NUM(xmlGetLineNo(node));
}
/*
* call-seq:
* add_namespace_definition(prefix, href)
*
* Adds a namespace definition with +prefix+ using +href+ value. The result is
* as if parsed XML for this node had included an attribute
* 'xmlns:prefix=value'. A default namespace for this node ("xmlns=") can be
* added by passing 'nil' for prefix. Namespaces added this way will not
* show up in #attributes, but they will be included as an xmlns attribute
* when the node is serialized to XML.
*/
static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
{
xmlNodePtr node, namespacee;
xmlNsPtr ns;
Data_Get_Struct(self, xmlNode, node);
namespacee = node ;
ns = xmlSearchNs(
node->doc,
node,
(const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
);
if(!ns) {
if (node->type != XML_ELEMENT_NODE) {
namespacee = node->parent;
}
ns = xmlNewNs(
namespacee,
(const xmlChar *)StringValuePtr(href),
(const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
);
}
if (!ns) return Qnil ;
if(NIL_P(prefix) || node != namespacee) xmlSetNs(node, ns);
return Nokogiri_wrap_xml_namespace(node->doc, ns);
}
/*
* call-seq:
* new(name, document)
*
* Create a new node with +name+ sharing GC lifecycle with +document+
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr doc;
xmlNodePtr node;
VALUE name;
VALUE document;
VALUE rest;
VALUE rb_node;
rb_scan_args(argc, argv, "2*", &name, &document, &rest);
Data_Get_Struct(document, xmlDoc, doc);
node = xmlNewNode(NULL, (xmlChar *)StringValuePtr(name));
node->doc = doc->doc;
nokogiri_root_node(node);
rb_node = Nokogiri_wrap_xml_node(
klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
node
);
rb_obj_call_init(rb_node, argc, argv);
if(rb_block_given_p()) rb_yield(rb_node);
return rb_node;
}
/*
* call-seq:
* dump_html
*
* Returns the Node as html.
*/
static VALUE dump_html(VALUE self)
{
xmlBufferPtr buf ;
xmlNodePtr node ;
VALUE html;
Data_Get_Struct(self, xmlNode, node);
buf = xmlBufferCreate() ;
htmlNodeDump(buf, node->doc, node);
html = NOKOGIRI_STR_NEW2(buf->content);
xmlBufferFree(buf);
return html ;
}
/*
* call-seq:
* compare(other)
*
* Compare this Node to +other+ with respect to their Document
*/
static VALUE compare(VALUE self, VALUE _other)
{
xmlNodePtr node, other;
Data_Get_Struct(self, xmlNode, node);
Data_Get_Struct(_other, xmlNode, other);
return INT2NUM((long)xmlXPathCmpNodes(other, node));
}
/*
* call-seq:
* process_xincludes(options)
*
* Loads and substitutes all xinclude elements below the node. The
* parser context will be initialized with +options+.
*/
static VALUE process_xincludes(VALUE self, VALUE options)
{
int rcode ;
xmlNodePtr node;
VALUE error_list = rb_ary_new();
Data_Get_Struct(self, xmlNode, node);
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
xmlSetStructuredErrorFunc(NULL, NULL);
if (rcode < 0) {
xmlErrorPtr error;
error = xmlGetLastError();
if(error)
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
else
rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
}
return self;
}
/* TODO: DOCUMENT ME */
static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
{
xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children;
xmlNodeSetPtr set;
xmlParserErrors error;
VALUE doc, err;
int doc_is_empty;
Data_Get_Struct(self, xmlNode, node);
doc = DOC_RUBY_OBJECT(node->doc);
err = rb_iv_get(doc, "@errors");
doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
node_children = node->children;
doc_children = node->doc->children;
xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
/* Twiddle global variable because of a bug in libxml2.
* http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
*/
#ifndef HTML_PARSE_NOIMPLIED
htmlHandleOmittedElem(0);
#endif
/* This function adds a fake node to the child of +node+. If the parser
* does not exit cleanly with XML_ERR_OK, the list is freed. This can
* leave the child pointers in a bad state if they were originally empty.
*
* http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
* */
error = xmlParseInNodeContext(node, StringValuePtr(_str),
(int)RSTRING_LEN(_str),
(int)NUM2INT(_options), &list);
/* xmlParseInNodeContext should not mutate the original document or node,
* so reassigning these pointers should be OK. The reason we're reassigning
* is because if there were errors, it's possible for the child pointers
* to be manipulated. */
if (error != XML_ERR_OK) {
node->doc->children = doc_children;
node->children = node_children;
}
/* make sure parent/child pointers are coherent so an unlink will work
* properly (#331)
*/
child_iter = node->doc->children ;
while (child_iter) {
if (child_iter->parent != (xmlNodePtr)node->doc)
child_iter->parent = (xmlNodePtr)node->doc;
child_iter = child_iter->next;
}
#ifndef HTML_PARSE_NOIMPLIED
htmlHandleOmittedElem(1);
#endif
xmlSetStructuredErrorFunc(NULL, NULL);
/* Workaround for a libxml2 bug where a parsing error may leave a broken
* node reference in node->doc->children.
* This workaround is limited to when a parse error occurs, the document
* went from having no children to having children, and the context node is
* part of a document fragment.
* https://bugzilla.gnome.org/show_bug.cgi?id=668155
*/
if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
child_iter = node;
while (child_iter->parent)
child_iter = child_iter->parent;
if (child_iter->type == XML_DOCUMENT_FRAG_NODE)
node->doc->children = NULL;
}
/* FIXME: This probably needs to handle more constants... */
switch (error) {
case XML_ERR_INTERNAL_ERROR:
case XML_ERR_NO_MEMORY:
rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
break;
default:
break;
}
set = xmlXPathNodeSetCreate(NULL);
while (list) {
tmp = list->next;
list->next = NULL;
xmlXPathNodeSetAddUnique(set, list);
nokogiri_root_node(list);
list = tmp;
}
return Nokogiri_wrap_xml_node_set(set, doc);
}
VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
{
VALUE document = Qnil ;
VALUE node_cache = Qnil ;
VALUE rb_node = Qnil ;
nokogiriTuplePtr node_has_a_document;
xmlDocPtr doc;
void (*mark_method)(xmlNodePtr) = NULL ;
assert(node);
if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
return DOC_RUBY_OBJECT(node->doc);
/* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
/* see https://github.com/sparklemotion/nokogiri/issues/95 */
/* and https://github.com/sparklemotion/nokogiri/issues/439 */
doc = node->doc;
if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
node_has_a_document = DOC_RUBY_OBJECT_TEST(doc);
if(node->_private && node_has_a_document)
return (VALUE)node->_private;
if(!RTEST(klass)) {
switch(node->type)
{
case XML_ELEMENT_NODE:
klass = cNokogiriXmlElement;
break;
case XML_TEXT_NODE:
klass = cNokogiriXmlText;
break;
case XML_ATTRIBUTE_NODE:
klass = cNokogiriXmlAttr;
break;
case XML_ENTITY_REF_NODE:
klass = cNokogiriXmlEntityReference;
break;
case XML_COMMENT_NODE:
klass = cNokogiriXmlComment;
break;
case XML_DOCUMENT_FRAG_NODE:
klass = cNokogiriXmlDocumentFragment;
break;
case XML_PI_NODE:
klass = cNokogiriXmlProcessingInstruction;
break;
case XML_ENTITY_DECL:
klass = cNokogiriXmlEntityDecl;
break;
case XML_CDATA_SECTION_NODE:
klass = cNokogiriXmlCData;
break;
case XML_DTD_NODE:
klass = cNokogiriXmlDtd;
break;
case XML_ATTRIBUTE_DECL:
klass = cNokogiriXmlAttributeDecl;
break;
case XML_ELEMENT_DECL:
klass = cNokogiriXmlElementDecl;
break;
default:
klass = cNokogiriXmlNode;
}
}
mark_method = node_has_a_document ? mark : NULL ;
rb_node = Data_Wrap_Struct(klass, mark_method, debug_node_dealloc, node) ;
node->_private = (void *)rb_node;
if (node_has_a_document) {
document = DOC_RUBY_OBJECT(doc);
node_cache = DOC_NODE_CACHE(doc);
rb_ary_push(node_cache, rb_node);
rb_funcall(document, decorate, 1, rb_node);
}
return rb_node ;
}
void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_list)
{
xmlAttrPtr prop;
prop = node->properties ;
while (prop != NULL) {
rb_ary_push(attr_list, Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop));
prop = prop->next ;
}
}
VALUE cNokogiriXmlNode ;
VALUE cNokogiriXmlElement ;
void init_xml_node()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE klass = rb_define_class_under(xml, "Node", rb_cObject);
cNokogiriXmlNode = klass;
cNokogiriXmlElement = rb_define_class_under(xml, "Element", klass);
rb_define_singleton_method(klass, "new", new, -1);
rb_define_method(klass, "add_namespace_definition", add_namespace_definition, 2);
rb_define_method(klass, "node_name", get_name, 0);
rb_define_method(klass, "document", document, 0);
rb_define_method(klass, "node_name=", set_name, 1);
rb_define_method(klass, "parent", get_parent, 0);
rb_define_method(klass, "child", child, 0);
rb_define_method(klass, "first_element_child", first_element_child, 0);
rb_define_method(klass, "last_element_child", last_element_child, 0);
rb_define_method(klass, "children", children, 0);
rb_define_method(klass, "element_children", element_children, 0);
rb_define_method(klass, "next_sibling", next_sibling, 0);
rb_define_method(klass, "previous_sibling", previous_sibling, 0);
rb_define_method(klass, "next_element", next_element, 0);
rb_define_method(klass, "previous_element", previous_element, 0);
rb_define_method(klass, "node_type", node_type, 0);
rb_define_method(klass, "content", get_content, 0);
rb_define_method(klass, "path", path, 0);
rb_define_method(klass, "key?", key_eh, 1);
rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2);
rb_define_method(klass, "blank?", blank_eh, 0);
rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
rb_define_method(klass, "attribute", attr, 1);
rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
rb_define_method(klass, "namespace", namespace, 0);
rb_define_method(klass, "namespace_definitions", namespace_definitions, 0);
rb_define_method(klass, "namespace_scopes", namespace_scopes, 0);
rb_define_method(klass, "encode_special_chars", encode_special_chars, 1);
rb_define_method(klass, "dup", duplicate_node, -1);
rb_define_method(klass, "unlink", unlink_node, 0);
rb_define_method(klass, "internal_subset", internal_subset, 0);
rb_define_method(klass, "external_subset", external_subset, 0);
rb_define_method(klass, "create_internal_subset", create_internal_subset, 3);
rb_define_method(klass, "create_external_subset", create_external_subset, 3);
rb_define_method(klass, "pointer_id", pointer_id, 0);
rb_define_method(klass, "line", line, 0);
rb_define_method(klass, "native_content=", native_content, 1);
rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
rb_define_private_method(klass, "in_context", in_context, 2);
rb_define_private_method(klass, "add_child_node", add_child, 1);
rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
rb_define_private_method(klass, "add_next_sibling_node", add_next_sibling, 1);
rb_define_private_method(klass, "replace_node", replace, 1);
rb_define_private_method(klass, "dump_html", dump_html, 0);
rb_define_private_method(klass, "native_write_to", native_write_to, 4);
rb_define_private_method(klass, "get", get, 1);
rb_define_private_method(klass, "set", set, 2);
rb_define_private_method(klass, "set_namespace", set_namespace, 1);
rb_define_private_method(klass, "compare", compare, 1);
decorate = rb_intern("decorate");
decorate_bang = rb_intern("decorate!");
}
/* vim: set noet sw=4 sws=4 */
nokogiri-1.6.1/ext/nokogiri/xslt_stylesheet.c 0000644 0001750 0001750 00000016402 12261213762 020753 0 ustar boutil boutil #include
#include
#include
#include
#include
VALUE xslt;
int vasprintf (char **strp, const char *fmt, va_list ap);
void vasprintf_free (void *p);
static void mark(nokogiriXsltStylesheetTuple *wrapper)
{
rb_gc_mark(wrapper->func_instances);
}
static void dealloc(nokogiriXsltStylesheetTuple *wrapper)
{
xsltStylesheetPtr doc = wrapper->ss;
NOKOGIRI_DEBUG_START(doc);
xsltFreeStylesheet(doc); /* commented out for now. */
NOKOGIRI_DEBUG_END(doc);
free(wrapper);
}
static void xslt_generic_error_handler(void * ctx, const char *msg, ...)
{
char * message;
va_list args;
va_start(args, msg);
vasprintf(&message, msg, args);
va_end(args);
rb_str_cat2((VALUE)ctx, message);
vasprintf_free(message);
}
VALUE Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
{
VALUE self;
nokogiriXsltStylesheetTuple *wrapper;
self = Data_Make_Struct(cNokogiriXsltStylesheet, nokogiriXsltStylesheetTuple,
mark, dealloc, wrapper);
ss->_private = (void *)self;
wrapper->ss = ss;
wrapper->func_instances = rb_ary_new();
return self;
}
/*
* call-seq:
* parse_stylesheet_doc(document)
*
* Parse a stylesheet from +document+.
*/
static VALUE parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
{
xmlDocPtr xml, xml_cpy;
VALUE errstr, exception;
xsltStylesheetPtr ss ;
Data_Get_Struct(xmldocobj, xmlDoc, xml);
exsltRegisterAll();
errstr = rb_str_new(0, 0);
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
xml_cpy = xmlCopyDoc(xml, 1); /* 1 => recursive */
ss = xsltParseStylesheetDoc(xml_cpy);
xsltSetGenericErrorFunc(NULL, NULL);
if (!ss) {
xmlFreeDoc(xml_cpy);
exception = rb_exc_new3(rb_eRuntimeError, errstr);
rb_exc_raise(exception);
}
return Nokogiri_wrap_xslt_stylesheet(ss);
}
/*
* call-seq:
* serialize(document)
*
* Serialize +document+ to an xml string.
*/
static VALUE serialize(VALUE self, VALUE xmlobj)
{
xmlDocPtr xml ;
nokogiriXsltStylesheetTuple *wrapper;
xmlChar* doc_ptr ;
int doc_len ;
VALUE rval ;
Data_Get_Struct(xmlobj, xmlDoc, xml);
Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
xmlFree(doc_ptr);
return rval ;
}
static void swallow_superfluous_xml_errors(void * userdata, xmlErrorPtr error, ...)
{
}
/*
* call-seq:
* transform(document, params = [])
*
* Apply an XSLT stylesheet to an XML::Document.
* +params+ is an array of strings used as XSLT parameters.
* returns Nokogiri::XML::Document
*
* Example:
*
* doc = Nokogiri::XML(File.read(ARGV[0]))
* xslt = Nokogiri::XSLT(File.read(ARGV[1]))
* puts xslt.transform(doc, ['key', 'value'])
*
*/
static VALUE transform(int argc, VALUE* argv, VALUE self)
{
VALUE xmldoc, paramobj, errstr, exception ;
xmlDocPtr xml ;
xmlDocPtr result ;
nokogiriXsltStylesheetTuple *wrapper;
const char** params ;
long param_len, j ;
int parse_error_occurred ;
rb_scan_args(argc, argv, "11", &xmldoc, ¶mobj);
if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; }
if (!rb_obj_is_kind_of(xmldoc, cNokogiriXmlDocument))
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
/* handle hashes as arguments. */
if(T_HASH == TYPE(paramobj)) {
paramobj = rb_funcall(paramobj, rb_intern("to_a"), 0);
paramobj = rb_funcall(paramobj, rb_intern("flatten"), 0);
}
Check_Type(paramobj, T_ARRAY);
Data_Get_Struct(xmldoc, xmlDoc, xml);
Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
param_len = RARRAY_LEN(paramobj);
params = calloc((size_t)param_len+1, sizeof(char*));
for (j = 0 ; j < param_len ; j++) {
VALUE entry = rb_ary_entry(paramobj, j);
const char * ptr = StringValuePtr(entry);
params[j] = ptr;
}
params[param_len] = 0 ;
errstr = rb_str_new(0, 0);
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
xmlSetGenericErrorFunc(NULL, (xmlGenericErrorFunc)&swallow_superfluous_xml_errors);
result = xsltApplyStylesheet(wrapper->ss, xml, params);
free(params);
xsltSetGenericErrorFunc(NULL, NULL);
xmlSetGenericErrorFunc(NULL, NULL);
parse_error_occurred = (Qfalse == rb_funcall(errstr, rb_intern("empty?"), 0));
if (parse_error_occurred) {
exception = rb_exc_new3(rb_eRuntimeError, errstr);
rb_exc_raise(exception);
}
return Nokogiri_wrap_xml_document((VALUE)0, result) ;
}
static void method_caller(xmlXPathParserContextPtr ctxt, int nargs)
{
VALUE handler;
const char *function_name;
xsltTransformContextPtr transform;
const xmlChar *functionURI;
transform = xsltXPathGetTransformContext(ctxt);
functionURI = ctxt->context->functionURI;
handler = (VALUE)xsltGetExtData(transform, functionURI);
function_name = (const char*)(ctxt->context->function);
Nokogiri_marshal_xpath_funcall_and_return_values(ctxt, nargs, handler, (const char*)function_name);
}
static void * initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
{
VALUE modules = rb_iv_get(xslt, "@modules");
VALUE obj = rb_hash_aref(modules, rb_str_new2((const char *)uri));
VALUE args = { Qfalse };
VALUE methods = rb_funcall(obj, rb_intern("instance_methods"), 1, args);
VALUE inst;
nokogiriXsltStylesheetTuple *wrapper;
int i;
for(i = 0; i < RARRAY_LEN(methods); i++) {
VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
xsltRegisterExtFunction(ctxt,
(unsigned char *)StringValuePtr(method_name), uri, method_caller);
}
Data_Get_Struct(ctxt->style->_private, nokogiriXsltStylesheetTuple,
wrapper);
inst = rb_class_new_instance(0, NULL, obj);
rb_ary_push(wrapper->func_instances, inst);
return (void *)inst;
}
static void shutdownFunc(xsltTransformContextPtr ctxt,
const xmlChar *uri, void *data)
{
nokogiriXsltStylesheetTuple *wrapper;
Data_Get_Struct(ctxt->style->_private, nokogiriXsltStylesheetTuple,
wrapper);
rb_ary_clear(wrapper->func_instances);
}
/*
* call-seq:
* register(uri, custom_handler_class)
*
* Register a class that implements custom XLST transformation functions.
*/
static VALUE registr(VALUE self, VALUE uri, VALUE obj)
{
VALUE modules = rb_iv_get(self, "@modules");
if(NIL_P(modules)) rb_raise(rb_eRuntimeError, "wtf! @modules isn't set");
rb_hash_aset(modules, uri, obj);
xsltRegisterExtModule((unsigned char *)StringValuePtr(uri), initFunc, shutdownFunc);
return self;
}
VALUE cNokogiriXsltStylesheet ;
void init_xslt_stylesheet()
{
VALUE nokogiri;
VALUE klass;
nokogiri = rb_define_module("Nokogiri");
xslt = rb_define_module_under(nokogiri, "XSLT");
klass = rb_define_class_under(xslt, "Stylesheet", rb_cObject);
rb_iv_set(xslt, "@modules", rb_hash_new());
cNokogiriXsltStylesheet = klass;
rb_define_singleton_method(klass, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
rb_define_singleton_method(xslt, "register", registr, 2);
rb_define_method(klass, "serialize", serialize, 1);
rb_define_method(klass, "transform", transform, -1);
}
nokogiri-1.6.1/ext/nokogiri/xml_processing_instruction.c 0000644 0001750 0001750 00000002466 12261213762 023212 0 ustar boutil boutil #include
/*
* call-seq:
* new(document, name, content)
*
* Create a new ProcessingInstruction element on the +document+ with +name+
* and +content+
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr xml_doc;
xmlNodePtr node;
VALUE document;
VALUE name;
VALUE content;
VALUE rest;
VALUE rb_node;
rb_scan_args(argc, argv, "3*", &document, &name, &content, &rest);
Data_Get_Struct(document, xmlDoc, xml_doc);
node = xmlNewDocPI(
xml_doc,
(const xmlChar *)StringValuePtr(name),
(const xmlChar *)StringValuePtr(content)
);
nokogiri_root_node(node);
rb_node = Nokogiri_wrap_xml_node(klass, node);
rb_obj_call_init(rb_node, argc, argv);
if(rb_block_given_p()) rb_yield(rb_node);
return rb_node;
}
VALUE cNokogiriXmlProcessingInstruction;
void init_xml_processing_instruction()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
/*
* ProcessingInstruction represents a ProcessingInstruction node in an xml
* document.
*/
VALUE klass = rb_define_class_under(xml, "ProcessingInstruction", node);
cNokogiriXmlProcessingInstruction = klass;
rb_define_singleton_method(klass, "new", new, -1);
}
nokogiri-1.6.1/ext/nokogiri/html_sax_push_parser.h 0000644 0001750 0001750 00000000270 12261213762 021743 0 ustar boutil boutil #ifndef NOKOGIRI_HTML_SAX_PUSH_PARSER
#define NOKOGIRI_HTML_SAX_PUSH_PARSER
#include
void init_html_sax_push_parser();
extern VALUE cNokogiriHtmlSaxPushParser ;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_document_fragment.c 0000644 0001750 0001750 00000002121 12261213762 022062 0 ustar boutil boutil #include
/*
* call-seq:
* new(document)
*
* Create a new DocumentFragment element on the +document+
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr xml_doc;
xmlNodePtr node;
VALUE document;
VALUE rest;
VALUE rb_node;
rb_scan_args(argc, argv, "1*", &document, &rest);
Data_Get_Struct(document, xmlDoc, xml_doc);
node = xmlNewDocFragment(xml_doc->doc);
nokogiri_root_node(node);
rb_node = Nokogiri_wrap_xml_node(klass, node);
rb_obj_call_init(rb_node, argc, argv);
if(rb_block_given_p()) rb_yield(rb_node);
return rb_node;
}
VALUE cNokogiriXmlDocumentFragment;
void init_xml_document_fragment()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
/*
* DocumentFragment represents a DocumentFragment node in an xml document.
*/
VALUE klass = rb_define_class_under(xml, "DocumentFragment", node);
cNokogiriXmlDocumentFragment = klass;
rb_define_singleton_method(klass, "new", new, -1);
}
nokogiri-1.6.1/ext/nokogiri/xml_io.c 0000644 0001750 0001750 00000002132 12261213762 016772 0 ustar boutil boutil #include
static ID id_read, id_write;
VALUE read_check(VALUE *args) {
return rb_funcall(args[0], id_read, 1, args[1]);
}
VALUE read_failed(void) {
return Qnil;
}
int io_read_callback(void * ctx, char * buffer, int len) {
VALUE string, args[2];
size_t str_len, safe_len;
args[0] = (VALUE)ctx;
args[1] = INT2NUM(len);
string = rb_rescue(read_check, (VALUE)args, read_failed, 0);
if(NIL_P(string)) return 0;
str_len = (size_t)RSTRING_LEN(string);
safe_len = str_len > (size_t)len ? (size_t)len : str_len;
memcpy(buffer, StringValuePtr(string), safe_len);
return (int)safe_len;
}
VALUE write_check(VALUE *args) {
return rb_funcall(args[0], id_write, 1, args[1]);
}
VALUE write_failed(void) {
return Qnil;
}
int io_write_callback(void * ctx, char * buffer, int len) {
VALUE args[2];
args[0] = (VALUE)ctx;
args[1] = rb_str_new(buffer, (long)len);
rb_rescue(write_check, (VALUE)args, write_failed, 0);
return len;
}
int io_close_callback(void * ctx) {
return 0;
}
void init_nokogiri_io() {
id_read = rb_intern("read");
id_write = rb_intern("write");
}
nokogiri-1.6.1/ext/nokogiri/html_document.h 0000644 0001750 0001750 00000000237 12261213762 020356 0 ustar boutil boutil #ifndef NOKOGIRI_HTML_DOCUMENT
#define NOKOGIRI_HTML_DOCUMENT
#include
void init_html_document();
extern VALUE cNokogiriHtmlDocument ;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_attribute_decl.c 0000644 0001750 0001750 00000002624 12261213762 021363 0 ustar boutil boutil #include
/*
* call-seq:
* attribute_type
*
* The attribute_type for this AttributeDecl
*/
static VALUE attribute_type(VALUE self)
{
xmlAttributePtr node;
Data_Get_Struct(self, xmlAttribute, node);
return INT2NUM((long)node->atype);
}
/*
* call-seq:
* default
*
* The default value
*/
static VALUE default_value(VALUE self)
{
xmlAttributePtr node;
Data_Get_Struct(self, xmlAttribute, node);
if(node->defaultValue) return NOKOGIRI_STR_NEW2(node->defaultValue);
return Qnil;
}
/*
* call-seq:
* enumeration
*
* An enumeration of possible values
*/
static VALUE enumeration(VALUE self)
{
xmlAttributePtr node;
xmlEnumerationPtr enm;
VALUE list;
Data_Get_Struct(self, xmlAttribute, node);
list = rb_ary_new();
enm = node->tree;
while(enm) {
rb_ary_push(list, NOKOGIRI_STR_NEW2(enm->name));
enm = enm->next;
}
return list;
}
VALUE cNokogiriXmlAttributeDecl;
void init_xml_attribute_decl()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
VALUE klass = rb_define_class_under(xml, "AttributeDecl", node);
cNokogiriXmlAttributeDecl = klass;
rb_define_method(klass, "attribute_type", attribute_type, 0);
rb_define_method(klass, "default", default_value, 0);
rb_define_method(klass, "enumeration", enumeration, 0);
}
nokogiri-1.6.1/ext/nokogiri/xml_attr.h 0000644 0001750 0001750 00000000211 12261213762 017336 0 ustar boutil boutil #ifndef NOKOGIRI_XML_ATTR
#define NOKOGIRI_XML_ATTR
#include
void init_xml_attr();
extern VALUE cNokogiriXmlAttr;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_relax_ng.h 0000644 0001750 0001750 00000000230 12261213762 020164 0 ustar boutil boutil #ifndef NOKOGIRI_XML_RELAX_NG
#define NOKOGIRI_XML_RELAX_NG
#include
void init_xml_relax_ng();
extern VALUE cNokogiriXmlRelaxNG;
#endif
nokogiri-1.6.1/ext/nokogiri/xslt_stylesheet.h 0000644 0001750 0001750 00000000444 12261213762 020757 0 ustar boutil boutil #ifndef NOKOGIRI_XSLT_STYLESHEET
#define NOKOGIRI_XSLT_STYLESHEET
#include
void init_xslt_stylesheet();
extern VALUE cNokogiriXsltStylesheet ;
typedef struct _nokogiriXsltStylesheetTuple {
xsltStylesheetPtr ss;
VALUE func_instances;
} nokogiriXsltStylesheetTuple;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_syntax_error.h 0000644 0001750 0001750 00000000564 12261213762 021136 0 ustar boutil boutil #ifndef NOKOGIRI_XML_SYNTAX_ERROR
#define NOKOGIRI_XML_SYNTAX_ERROR
#include
void init_xml_syntax_error();
VALUE Nokogiri_wrap_xml_syntax_error(VALUE klass, xmlErrorPtr error);
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error);
NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error));
extern VALUE cNokogiriXmlSyntaxError;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_document.c 0000644 0001750 0001750 00000035037 12261213762 020213 0 ustar boutil boutil #include
static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
{
switch(node->type) {
case XML_ATTRIBUTE_NODE:
xmlFreePropList((xmlAttrPtr)node);
break;
case XML_NAMESPACE_DECL:
xmlFree(node);
break;
default:
if(node->parent == NULL) {
xmlAddChild((xmlNodePtr)doc, node);
}
}
return ST_CONTINUE;
}
static void dealloc(xmlDocPtr doc)
{
xmlDeregisterNodeFunc func;
st_table *node_hash;
NOKOGIRI_DEBUG_START(doc);
func = xmlDeregisterNodeDefault(NULL);
node_hash = DOC_UNLINKED_NODE_HASH(doc);
st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
st_free_table(node_hash);
free(doc->_private);
doc->_private = NULL;
xmlFreeDoc(doc);
xmlDeregisterNodeDefault(func);
NOKOGIRI_DEBUG_END(doc);
}
static void recursively_remove_namespaces_from_node(xmlNodePtr node)
{
xmlNodePtr child ;
xmlAttrPtr property ;
xmlSetNs(node, NULL);
for (child = node->children ; child ; child = child->next)
recursively_remove_namespaces_from_node(child);
if (((node->type == XML_ELEMENT_NODE) ||
(node->type == XML_XINCLUDE_START) ||
(node->type == XML_XINCLUDE_END)) &&
node->nsDef) {
xmlFreeNsList(node->nsDef);
node->nsDef = NULL;
}
if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
property = node->properties ;
while (property != NULL) {
if (property->ns) property->ns = NULL ;
property = property->next ;
}
}
}
/*
* call-seq:
* url
*
* Get the url name for this document.
*/
static VALUE url(VALUE self)
{
xmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
return Qnil;
}
/*
* call-seq:
* root=
*
* Set the root element on this document
*/
static VALUE set_root(VALUE self, VALUE root)
{
xmlDocPtr doc;
xmlNodePtr new_root;
xmlNodePtr old_root;
Data_Get_Struct(self, xmlDoc, doc);
old_root = NULL;
if(NIL_P(root)) {
old_root = xmlDocGetRootElement(doc);
if(old_root) {
xmlUnlinkNode(old_root);
nokogiri_root_node(old_root);
}
return root;
}
Data_Get_Struct(root, xmlNode, new_root);
/* If the new root's document is not the same as the current document,
* then we need to dup the node in to this document. */
if(new_root->doc != doc) {
old_root = xmlDocGetRootElement(doc);
if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
}
}
xmlDocSetRootElement(doc, new_root);
if(old_root) nokogiri_root_node(old_root);
return root;
}
/*
* call-seq:
* root
*
* Get the root node for this document.
*/
static VALUE root(VALUE self)
{
xmlDocPtr doc;
xmlNodePtr root;
Data_Get_Struct(self, xmlDoc, doc);
root = xmlDocGetRootElement(doc);
if(!root) return Qnil;
return Nokogiri_wrap_xml_node(Qnil, root) ;
}
/*
* call-seq:
* encoding= encoding
*
* Set the encoding string for this Document
*/
static VALUE set_encoding(VALUE self, VALUE encoding)
{
xmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
if (doc->encoding)
free((char *) doc->encoding); /* this may produce a gcc cast warning */
doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));
return encoding;
}
/*
* call-seq:
* encoding
*
* Get the encoding for this Document
*/
static VALUE encoding(VALUE self)
{
xmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
if(!doc->encoding) return Qnil;
return NOKOGIRI_STR_NEW2(doc->encoding);
}
/*
* call-seq:
* version
*
* Get the XML version for this Document
*/
static VALUE version(VALUE self)
{
xmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
if(!doc->version) return Qnil;
return NOKOGIRI_STR_NEW2(doc->version);
}
/*
* call-seq:
* read_io(io, url, encoding, options)
*
* Create a new document from an IO object
*/
static VALUE read_io( VALUE klass,
VALUE io,
VALUE url,
VALUE encoding,
VALUE options )
{
const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
VALUE error_list = rb_ary_new();
VALUE document;
xmlDocPtr doc;
xmlResetLastError();
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
doc = xmlReadIO(
(xmlInputReadCallback)io_read_callback,
(xmlInputCloseCallback)io_close_callback,
(void *)io,
c_url,
c_enc,
(int)NUM2INT(options)
);
xmlSetStructuredErrorFunc(NULL, NULL);
if(doc == NULL) {
xmlErrorPtr error;
xmlFreeDoc(doc);
error = xmlGetLastError();
if(error)
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
document = Nokogiri_wrap_xml_document(klass, doc);
rb_iv_set(document, "@errors", error_list);
return document;
}
/*
* call-seq:
* read_memory(string, url, encoding, options)
*
* Create a new document from a String
*/
static VALUE read_memory( VALUE klass,
VALUE string,
VALUE url,
VALUE encoding,
VALUE options )
{
const char * c_buffer = StringValuePtr(string);
const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
int len = (int)RSTRING_LEN(string);
VALUE error_list = rb_ary_new();
VALUE document;
xmlDocPtr doc;
xmlResetLastError();
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
xmlSetStructuredErrorFunc(NULL, NULL);
if(doc == NULL) {
xmlErrorPtr error;
xmlFreeDoc(doc);
error = xmlGetLastError();
if(error)
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
document = Nokogiri_wrap_xml_document(klass, doc);
rb_iv_set(document, "@errors", error_list);
return document;
}
/*
* call-seq:
* dup
*
* Copy this Document. An optional depth may be passed in, but it defaults
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
*/
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
{
xmlDocPtr doc, dup;
VALUE level;
if(rb_scan_args(argc, argv, "01", &level) == 0)
level = INT2NUM((long)1);
Data_Get_Struct(self, xmlDoc, doc);
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
if(dup == NULL) return Qnil;
dup->type = doc->type;
return Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
}
/*
* call-seq:
* new(version = default)
*
* Create a new document with +version+ (defaults to "1.0")
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr doc;
VALUE version, rest, rb_doc ;
rb_scan_args(argc, argv, "0*", &rest);
version = rb_ary_entry(rest, (long)0);
if (NIL_P(version)) version = rb_str_new2("1.0");
doc = xmlNewDoc((xmlChar *)StringValuePtr(version));
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
rb_obj_call_init(rb_doc, argc, argv);
return rb_doc ;
}
/*
* call-seq:
* remove_namespaces!
*
* Remove all namespaces from all nodes in the document.
*
* This could be useful for developers who either don't understand namespaces
* or don't care about them.
*
* The following example shows a use case, and you can decide for yourself
* whether this is a good thing or not:
*
* doc = Nokogiri::XML <<-EOXML
*
*
* Michelin Model XGV
*
*
* I'm a bicycle tire!
*
*
* EOXML
*
* doc.xpath("//tire").to_s # => ""
* doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "Michelin Model XGV"
* doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "I'm a bicycle tire!"
*
* doc.remove_namespaces!
*
* doc.xpath("//tire").to_s # => "Michelin Model XGVI'm a bicycle tire!"
* doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
* doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
*
* For more information on why this probably is *not* a good thing in general,
* please direct your browser to
* http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
*/
VALUE remove_namespaces_bang(VALUE self)
{
xmlDocPtr doc ;
Data_Get_Struct(self, xmlDoc, doc);
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
return self;
}
/* call-seq: doc.create_entity(name, type, external_id, system_id, content)
*
* Create a new entity named +name+.
*
* +type+ is an integer representing the type of entity to be created, and it
* defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See
* the constants on Nokogiri::XML::EntityDecl for more information.
*
* +external_id+, +system_id+, and +content+ set the External ID, System ID,
* and content respectively. All of these parameters are optional.
*/
static VALUE create_entity(int argc, VALUE *argv, VALUE self)
{
VALUE name;
VALUE type;
VALUE external_id;
VALUE system_id;
VALUE content;
xmlEntityPtr ptr;
xmlDocPtr doc ;
Data_Get_Struct(self, xmlDoc, doc);
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
&content);
xmlResetLastError();
ptr = xmlAddDocEntity(
doc,
(xmlChar *)(NIL_P(name) ? NULL : StringValuePtr(name)),
(int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
(xmlChar *)(NIL_P(external_id) ? NULL : StringValuePtr(external_id)),
(xmlChar *)(NIL_P(system_id) ? NULL : StringValuePtr(system_id)),
(xmlChar *)(NIL_P(content) ? NULL : StringValuePtr(content))
);
if(NULL == ptr) {
xmlErrorPtr error = xmlGetLastError();
if(error)
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
else
rb_raise(rb_eRuntimeError, "Could not create entity");
return Qnil;
}
return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
}
static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
{
VALUE block;
VALUE node;
VALUE parent;
VALUE ret;
if(_node->type == XML_NAMESPACE_DECL){
node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
}
else{
node = Nokogiri_wrap_xml_node(Qnil, _node);
}
parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
block = (VALUE)ctx;
ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
if(Qfalse == ret || Qnil == ret) return 0;
return 1;
}
/* call-seq:
* doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
* doc.canonicalize { |obj, parent| ... }
*
* Canonicalize a document and return the results. Takes an optional block
* that takes two parameters: the +obj+ and that node's +parent+.
* The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
* The block must return a non-nil, non-false value if the +obj+ passed in
* should be included in the canonicalized document.
*/
static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
{
VALUE mode;
VALUE incl_ns;
VALUE with_comments;
xmlChar **ns;
long ns_len, i;
xmlDocPtr doc;
xmlOutputBufferPtr buf;
xmlC14NIsVisibleCallback cb = NULL;
void * ctx = NULL;
VALUE rb_cStringIO;
VALUE io;
rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
Data_Get_Struct(self, xmlDoc, doc);
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
io = rb_class_new_instance(0, 0, rb_cStringIO);
buf = xmlAllocOutputBuffer(NULL);
buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
buf->context = (void *)io;
if(rb_block_given_p()) {
cb = block_caller;
ctx = (void *)rb_block_proc();
}
if(NIL_P(incl_ns)){
ns = NULL;
}
else{
ns_len = RARRAY_LEN(incl_ns);
ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
for (i = 0 ; i < ns_len ; i++) {
VALUE entry = rb_ary_entry(incl_ns, i);
const char * ptr = StringValuePtr(entry);
ns[i] = (xmlChar*) ptr;
}
}
xmlC14NExecute(doc, cb, ctx,
(int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
ns,
(int) (NIL_P(with_comments) ? 0 : 1),
buf);
xmlOutputBufferClose(buf);
return rb_funcall(io, rb_intern("string"), 0);
}
VALUE cNokogiriXmlDocument ;
void init_xml_document()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
/*
* Nokogiri::XML::Document wraps an xml document.
*/
VALUE klass = rb_define_class_under(xml, "Document", node);
cNokogiriXmlDocument = klass;
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
rb_define_singleton_method(klass, "read_io", read_io, 4);
rb_define_singleton_method(klass, "new", new, -1);
rb_define_method(klass, "root", root, 0);
rb_define_method(klass, "root=", set_root, 1);
rb_define_method(klass, "encoding", encoding, 0);
rb_define_method(klass, "encoding=", set_encoding, 1);
rb_define_method(klass, "version", version, 0);
rb_define_method(klass, "canonicalize", canonicalize, -1);
rb_define_method(klass, "dup", duplicate_node, -1);
rb_define_method(klass, "url", url, 0);
rb_define_method(klass, "create_entity", create_entity, -1);
rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
}
/* this takes klass as a param because it's used for HtmlDocument, too. */
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
{
nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
VALUE rb_doc = Data_Wrap_Struct(
klass ? klass : cNokogiriXmlDocument,
0,
dealloc,
doc
);
VALUE cache = rb_ary_new();
rb_iv_set(rb_doc, "@decorators", Qnil);
rb_iv_set(rb_doc, "@node_cache", cache);
tuple->doc = rb_doc;
tuple->unlinkedNodes = st_init_numtable_with_size(128);
tuple->node_cache = cache;
doc->_private = tuple ;
rb_obj_call_init(rb_doc, 0, NULL);
return rb_doc ;
}
nokogiri-1.6.1/ext/nokogiri/xml_dtd.h 0000644 0001750 0001750 00000000206 12261213762 017143 0 ustar boutil boutil #ifndef NOKOGIRI_XML_DTD
#define NOKOGIRI_XML_DTD
#include
extern VALUE cNokogiriXmlDtd;
void init_xml_dtd();
#endif
nokogiri-1.6.1/ext/nokogiri/xml_comment.c 0000644 0001750 0001750 00000002273 12261213762 020033 0 ustar boutil boutil #include
/*
* call-seq:
* new(document, content)
*
* Create a new Comment element on the +document+ with +content+
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr xml_doc;
xmlNodePtr node;
VALUE document;
VALUE content;
VALUE rest;
VALUE rb_node;
rb_scan_args(argc, argv, "2*", &document, &content, &rest);
Data_Get_Struct(document, xmlDoc, xml_doc);
node = xmlNewDocComment(
xml_doc,
(const xmlChar *)StringValuePtr(content)
);
rb_node = Nokogiri_wrap_xml_node(klass, node);
rb_obj_call_init(rb_node, argc, argv);
nokogiri_root_node(node);
if(rb_block_given_p()) rb_yield(rb_node);
return rb_node;
}
VALUE cNokogiriXmlComment;
void init_xml_comment()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
/*
* Comment represents a comment node in an xml document.
*/
VALUE klass = rb_define_class_under(xml, "Comment", char_data);
cNokogiriXmlComment = klass;
rb_define_singleton_method(klass, "new", new, -1);
}
nokogiri-1.6.1/ext/nokogiri/xml_element_content.c 0000644 0001750 0001750 00000005043 12261213762 021552 0 ustar boutil boutil #include
VALUE cNokogiriXmlElementContent;
/*
* call-seq:
* name
*
* Get the require element +name+
*/
static VALUE get_name(VALUE self)
{
xmlElementContentPtr elem;
Data_Get_Struct(self, xmlElementContent, elem);
if(!elem->name) return Qnil;
return NOKOGIRI_STR_NEW2(elem->name);
}
/*
* call-seq:
* type
*
* Get the element content +type+. Possible values are PCDATA, ELEMENT, SEQ,
* or OR.
*/
static VALUE get_type(VALUE self)
{
xmlElementContentPtr elem;
Data_Get_Struct(self, xmlElementContent, elem);
return INT2NUM((long)elem->type);
}
/*
* call-seq:
* c1
*
* Get the first child.
*/
static VALUE get_c1(VALUE self)
{
xmlElementContentPtr elem;
Data_Get_Struct(self, xmlElementContent, elem);
if(!elem->c1) return Qnil;
return Nokogiri_wrap_element_content(rb_iv_get(self, "@document"), elem->c1);
}
/*
* call-seq:
* c2
*
* Get the first child.
*/
static VALUE get_c2(VALUE self)
{
xmlElementContentPtr elem;
Data_Get_Struct(self, xmlElementContent, elem);
if(!elem->c2) return Qnil;
return Nokogiri_wrap_element_content(rb_iv_get(self, "@document"), elem->c2);
}
/*
* call-seq:
* occur
*
* Get the element content +occur+ flag. Possible values are ONCE, OPT, MULT
* or PLUS.
*/
static VALUE get_occur(VALUE self)
{
xmlElementContentPtr elem;
Data_Get_Struct(self, xmlElementContent, elem);
return INT2NUM((long)elem->ocur);
}
/*
* call-seq:
* prefix
*
* Get the element content namespace +prefix+.
*/
static VALUE get_prefix(VALUE self)
{
xmlElementContentPtr elem;
Data_Get_Struct(self, xmlElementContent, elem);
if(!elem->prefix) return Qnil;
return NOKOGIRI_STR_NEW2(elem->prefix);
}
VALUE Nokogiri_wrap_element_content(VALUE doc, xmlElementContentPtr element)
{
VALUE elem = Data_Wrap_Struct(cNokogiriXmlElementContent, 0, 0, element);
/* Setting the document is necessary so that this does not get GC'd until */
/* the document is GC'd */
rb_iv_set(elem, "@document", doc);
return elem;
}
void init_xml_element_content()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE klass = rb_define_class_under(xml, "ElementContent", rb_cObject);
cNokogiriXmlElementContent = klass;
rb_define_method(klass, "name", get_name, 0);
rb_define_method(klass, "type", get_type, 0);
rb_define_method(klass, "occur", get_occur, 0);
rb_define_method(klass, "prefix", get_prefix, 0);
rb_define_private_method(klass, "c1", get_c1, 0);
rb_define_private_method(klass, "c2", get_c2, 0);
}
nokogiri-1.6.1/ext/nokogiri/xml_schema.h 0000644 0001750 0001750 00000000221 12261213762 017625 0 ustar boutil boutil #ifndef NOKOGIRI_XML_SCHEMA
#define NOKOGIRI_XML_SCHEMA
#include
void init_xml_schema();
extern VALUE cNokogiriXmlSchema;
#endif
nokogiri-1.6.1/ext/nokogiri/xml_document_fragment.h 0000644 0001750 0001750 00000000275 12261213762 022077 0 ustar boutil boutil #ifndef NOKOGIRI_XML_DOCUMENT_FRAGMENT
#define NOKOGIRI_XML_DOCUMENT_FRAGMENT
#include
void init_xml_document_fragment();
extern VALUE cNokogiriXmlDocumentFragment;
#endif
nokogiri-1.6.1/ext/nokogiri/depend 0000644 0001750 0001750 00000050316 12261213762 016530 0 ustar boutil boutil html_document.o: html_document.c html_document.h nokogiri.h xml_io.h \
xml_document.h html_entity_lookup.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
html_element_description.o: html_element_description.c \
html_element_description.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h xml_namespace.h \
xml_encoding_handler.h
html_entity_lookup.o: html_entity_lookup.c html_entity_lookup.h \
nokogiri.h xml_io.h xml_document.h html_document.h xml_node.h \
xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
html_sax_parser_context.o: html_sax_parser_context.c \
html_sax_parser_context.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h xslt_stylesheet.h xml_syntax_error.h xml_schema.h \
xml_relax_ng.h html_element_description.h xml_namespace.h \
xml_encoding_handler.h
nokogiri.o: nokogiri.c nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_attr.o: xml_attr.c xml_attr.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_attribute_decl.o: xml_attribute_decl.c xml_attribute_decl.h \
nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \
html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_element_decl.h xml_entity_decl.h xml_xpath_context.h \
xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \
xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \
xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_cdata.o: xml_cdata.c xml_cdata.h nokogiri.h xml_io.h \
xml_document.h html_entity_lookup.h html_document.h xml_node.h \
xml_text.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_comment.o: xml_comment.c xml_comment.h nokogiri.h xml_io.h \
xml_document.h html_entity_lookup.h html_document.h xml_node.h \
xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_node_set.h \
xml_dtd.h xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_document.o: xml_document.c xml_document.h nokogiri.h xml_io.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_document_fragment.o: xml_document_fragment.c \
xml_document_fragment.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_dtd.o: xml_dtd.c xml_dtd.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_element_content.o: xml_element_content.c xml_element_content.h \
nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \
html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_sax_parser_context.h xml_sax_parser.h \
xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \
xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_element_decl.o: xml_element_decl.c xml_element_decl.h nokogiri.h \
xml_io.h xml_document.h html_entity_lookup.h html_document.h \
xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_entity_decl.h xml_xpath_context.h \
xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \
xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \
xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_encoding_handler.o: xml_encoding_handler.c xml_encoding_handler.h \
nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \
html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h
xml_entity_decl.o: xml_entity_decl.c xml_entity_decl.h nokogiri.h \
xml_io.h xml_document.h html_entity_lookup.h html_document.h \
xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_xpath_context.h \
xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \
xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \
xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_entity_reference.o: xml_entity_reference.c xml_entity_reference.h \
nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \
html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_io.o: xml_io.c xml_io.h nokogiri.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_namespace.o: xml_namespace.c xml_namespace.h nokogiri.h xml_io.h \
xml_document.h html_entity_lookup.h html_document.h xml_node.h \
xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_encoding_handler.h
xml_node.o: xml_node.c xml_node.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_text.h xml_cdata.h \
xml_attr.h xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_node_set.o: xml_node_set.c xml_node_set.h nokogiri.h xml_io.h \
xml_document.h html_entity_lookup.h html_document.h xml_node.h \
xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_dtd.h xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_processing_instruction.o: xml_processing_instruction.c \
xml_processing_instruction.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_entity_reference.h xml_document_fragment.h \
xml_comment.h xml_node_set.h xml_dtd.h xml_attribute_decl.h \
xml_element_decl.h xml_entity_decl.h xml_xpath_context.h \
xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \
xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \
xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_reader.o: xml_reader.c xml_reader.h nokogiri.h xml_io.h \
xml_document.h html_entity_lookup.h html_document.h xml_node.h \
xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_relax_ng.o: xml_relax_ng.c xml_relax_ng.h nokogiri.h xml_io.h \
xml_document.h html_entity_lookup.h html_document.h xml_node.h \
xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_schema.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_sax_parser.o: xml_sax_parser.c xml_sax_parser.h nokogiri.h \
xml_io.h xml_document.h html_entity_lookup.h html_document.h \
xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \
xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_sax_parser_context.o: xml_sax_parser_context.c \
xml_sax_parser_context.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_text.h \
xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_sax_push_parser.o: xml_sax_push_parser.c xml_sax_push_parser.h \
nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \
html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_reader.h html_sax_parser_context.h \
xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xml_schema.o: xml_schema.c xml_schema.h nokogiri.h xml_io.h \
xml_document.h html_entity_lookup.h html_document.h xml_node.h \
xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \
xml_entity_reference.h xml_document_fragment.h xml_comment.h \
xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \
xml_entity_decl.h xml_xpath_context.h xml_element_content.h \
xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \
xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \
xml_syntax_error.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_syntax_error.o: xml_syntax_error.c xml_syntax_error.h nokogiri.h \
xml_io.h xml_document.h html_entity_lookup.h html_document.h \
xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_schema.h \
xml_relax_ng.h html_element_description.h xml_namespace.h \
xml_encoding_handler.h
xml_text.o: xml_text.c xml_text.h nokogiri.h xml_io.h xml_document.h \
html_entity_lookup.h html_document.h xml_node.h xml_cdata.h \
xml_attr.h xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \
xml_schema.h xml_relax_ng.h html_element_description.h \
xml_namespace.h xml_encoding_handler.h
xml_xpath_context.o: xml_xpath_context.c xml_xpath_context.h \
nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \
html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \
xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \
xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \
html_element_description.h xml_namespace.h xml_encoding_handler.h
xslt_stylesheet.o: xslt_stylesheet.c xslt_stylesheet.h nokogiri.h \
xml_io.h xml_document.h html_entity_lookup.h html_document.h \
xml_node.h xml_text.h xml_cdata.h xml_attr.h \
xml_processing_instruction.h xml_entity_reference.h \
xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \
xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \
xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \
xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \
html_sax_parser_context.h xml_syntax_error.h xml_schema.h \
xml_relax_ng.h html_element_description.h xml_namespace.h \
xml_encoding_handler.h
nokogiri-1.6.1/ext/nokogiri/xml_syntax_error.c 0000644 0001750 0001750 00000003126 12261213762 021126 0 ustar boutil boutil #include
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error)
{
VALUE list = (VALUE)ctx;
rb_ary_push(list, Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
}
void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)
{
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
}
VALUE Nokogiri_wrap_xml_syntax_error(VALUE klass, xmlErrorPtr error)
{
VALUE msg, e;
if(!klass) klass = cNokogiriXmlSyntaxError;
msg = (error && error->message) ? NOKOGIRI_STR_NEW2(error->message) : Qnil;
e = rb_class_new_instance(
1,
&msg,
klass
);
if (error)
{
rb_iv_set(e, "@domain", INT2NUM(error->domain));
rb_iv_set(e, "@code", INT2NUM(error->code));
rb_iv_set(e, "@level", INT2NUM((short)error->level));
rb_iv_set(e, "@file", RBSTR_OR_QNIL(error->file));
rb_iv_set(e, "@line", INT2NUM(error->line));
rb_iv_set(e, "@str1", RBSTR_OR_QNIL(error->str1));
rb_iv_set(e, "@str2", RBSTR_OR_QNIL(error->str2));
rb_iv_set(e, "@str3", RBSTR_OR_QNIL(error->str3));
rb_iv_set(e, "@int1", INT2NUM(error->int1));
rb_iv_set(e, "@column", INT2NUM(error->int2));
}
return e;
}
VALUE cNokogiriXmlSyntaxError;
void init_xml_syntax_error()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
/*
* The XML::SyntaxError is raised on parse errors
*/
VALUE syntax_error_mommy = rb_define_class_under(nokogiri, "SyntaxError", rb_eStandardError);
VALUE klass = rb_define_class_under(xml, "SyntaxError", syntax_error_mommy);
cNokogiriXmlSyntaxError = klass;
}
nokogiri-1.6.1/ext/nokogiri/xml_text.c 0000644 0001750 0001750 00000002166 12261213762 017356 0 ustar boutil boutil #include
/*
* call-seq:
* new(content, document)
*
* Create a new Text element on the +document+ with +content+
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr doc;
xmlNodePtr node;
VALUE string;
VALUE document;
VALUE rest;
VALUE rb_node;
rb_scan_args(argc, argv, "2*", &string, &document, &rest);
Data_Get_Struct(document, xmlDoc, doc);
node = xmlNewText((xmlChar *)StringValuePtr(string));
node->doc = doc->doc;
nokogiri_root_node(node);
rb_node = Nokogiri_wrap_xml_node(klass, node) ;
rb_obj_call_init(rb_node, argc, argv);
if(rb_block_given_p()) rb_yield(rb_node);
return rb_node;
}
VALUE cNokogiriXmlText ;
void init_xml_text()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
/* */
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
/*
* Wraps Text nodes.
*/
VALUE klass = rb_define_class_under(xml, "Text", char_data);
cNokogiriXmlText = klass;
rb_define_singleton_method(klass, "new", new, -1);
}
nokogiri-1.6.1/ext/nokogiri/nokogiri.c 0000644 0001750 0001750 00000007456 12261213762 017342 0 ustar boutil boutil #include
VALUE mNokogiri ;
VALUE mNokogiriXml ;
VALUE mNokogiriHtml ;
VALUE mNokogiriXslt ;
VALUE mNokogiriXmlSax ;
VALUE mNokogiriHtmlSax ;
#ifdef USE_INCLUDED_VASPRINTF
/*
* I srsly hate windows. it doesn't have vasprintf.
* Thank you Geoffroy Couprie for this implementation of vasprintf!
*/
int vasprintf (char **strp, const char *fmt, va_list ap)
{
int len = vsnprintf (NULL, 0, fmt, ap) + 1;
char *res = (char *)malloc((unsigned int)len);
if (res == NULL)
return -1;
*strp = res;
return vsnprintf(res, (unsigned int)len, fmt, ap);
}
#endif
#ifdef USING_SYSTEM_ALLOCATOR_LIBRARY /* Ruby Enterprise Edition with tcmalloc */
void vasprintf_free (void *p)
{
system_free(p);
}
#else
void vasprintf_free (void *p)
{
free(p);
}
#endif
#ifdef HAVE_RUBY_UTIL_H
#include "ruby/util.h"
#else
#ifndef __MACRUBY__
#include "util.h"
#endif
#endif
void nokogiri_root_node(xmlNodePtr node)
{
xmlDocPtr doc;
nokogiriTuplePtr tuple;
doc = node->doc;
if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
tuple = (nokogiriTuplePtr)doc->_private;
st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
}
void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
{
nokogiriTuplePtr tuple;
if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
tuple = (nokogiriTuplePtr)doc->_private;
st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
}
void Init_nokogiri()
{
#ifndef __MACRUBY__
xmlMemSetup(
(xmlFreeFunc)ruby_xfree,
(xmlMallocFunc)ruby_xmalloc,
(xmlReallocFunc)ruby_xrealloc,
ruby_strdup
);
#endif
mNokogiri = rb_define_module("Nokogiri");
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
rb_const_set( mNokogiri,
rb_intern("LIBXML_VERSION"),
NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
);
rb_const_set( mNokogiri,
rb_intern("LIBXML_PARSER_VERSION"),
NOKOGIRI_STR_NEW2(xmlParserVersion)
);
#ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue);
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH));
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH));
#else
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse);
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil);
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil);
#endif
#ifdef LIBXML_ICONV_ENABLED
rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qtrue);
#else
rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
#endif
xmlInitParser();
init_xml_document();
init_html_document();
init_xml_node();
init_xml_document_fragment();
init_xml_text();
init_xml_cdata();
init_xml_processing_instruction();
init_xml_attr();
init_xml_entity_reference();
init_xml_comment();
init_xml_node_set();
init_xml_xpath_context();
init_xml_sax_parser_context();
init_xml_sax_parser();
init_xml_sax_push_parser();
init_xml_reader();
init_xml_dtd();
init_xml_element_content();
init_xml_attribute_decl();
init_xml_element_decl();
init_xml_entity_decl();
init_xml_namespace();
init_html_sax_parser_context();
init_html_sax_push_parser();
init_xslt_stylesheet();
init_xml_syntax_error();
init_html_entity_lookup();
init_html_element_description();
init_xml_schema();
init_xml_relax_ng();
init_nokogiri_io();
init_xml_encoding_handler();
}
nokogiri-1.6.1/ext/nokogiri/xml_attr.c 0000644 0001750 0001750 00000003774 12261213762 017352 0 ustar boutil boutil #include
/*
* call-seq:
* value=(content)
*
* Set the value for this Attr to +content+
*/
static VALUE set_value(VALUE self, VALUE content)
{
xmlAttrPtr attr;
Data_Get_Struct(self, xmlAttr, attr);
if(attr->children) xmlFreeNodeList(attr->children);
attr->children = attr->last = NULL;
if(content) {
xmlChar *buffer;
xmlNode *tmp;
/* Encode our content */
buffer = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValuePtr(content));
attr->children = xmlStringGetNodeList(attr->doc, buffer);
attr->last = NULL;
tmp = attr->children;
/* Loop through the children */
for(tmp = attr->children; tmp; tmp = tmp->next) {
tmp->parent = (xmlNode *)attr;
tmp->doc = attr->doc;
if(tmp->next == NULL) attr->last = tmp;
}
/* Free up memory */
xmlFree(buffer);
}
return content;
}
/*
* call-seq:
* new(document, name)
*
* Create a new Attr element on the +document+ with +name+
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
xmlDocPtr xml_doc;
VALUE document;
VALUE name;
VALUE rest;
xmlAttrPtr node;
VALUE rb_node;
rb_scan_args(argc, argv, "2*", &document, &name, &rest);
Data_Get_Struct(document, xmlDoc, xml_doc);
node = xmlNewDocProp(
xml_doc,
(const xmlChar *)StringValuePtr(name),
NULL
);
nokogiri_root_node((xmlNodePtr)node);
rb_node = Nokogiri_wrap_xml_node(klass, (xmlNodePtr)node);
rb_obj_call_init(rb_node, argc, argv);
if(rb_block_given_p()) rb_yield(rb_node);
return rb_node;
}
VALUE cNokogiriXmlAttr;
void init_xml_attr()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
/*
* Attr represents a Attr node in an xml document.
*/
VALUE klass = rb_define_class_under(xml, "Attr", node);
cNokogiriXmlAttr = klass;
rb_define_singleton_method(klass, "new", new, -1);
rb_define_method(klass, "value=", set_value, 1);
}
nokogiri-1.6.1/ext/nokogiri/xml_sax_push_parser.c 0000644 0001750 0001750 00000005277 12261213762 021606 0 ustar boutil boutil #include
static void deallocate(xmlParserCtxtPtr ctx)
{
NOKOGIRI_DEBUG_START(ctx);
if(ctx != NULL) {
NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
xmlFreeParserCtxt(ctx);
}
NOKOGIRI_DEBUG_END(ctx);
}
static VALUE allocate(VALUE klass)
{
return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
}
/*
* call-seq:
* native_write(chunk, last_chunk)
*
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
*/
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
{
xmlParserCtxtPtr ctx;
const char * chunk = NULL;
int size = 0;
Data_Get_Struct(self, xmlParserCtxt, ctx);
if(Qnil != _chunk) {
chunk = StringValuePtr(_chunk);
size = (int)RSTRING_LEN(_chunk);
}
if(xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
if (!(ctx->options & XML_PARSE_RECOVER)) {
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Nokogiri_error_raise(NULL, e);
}
}
return self;
}
/*
* call-seq:
* initialize_native(xml_sax, filename)
*
* Initialize the push parser with +xml_sax+ using +filename+
*/
static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
{
xmlSAXHandlerPtr sax;
const char * filename = NULL;
xmlParserCtxtPtr ctx;
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
if(_filename != Qnil) filename = StringValuePtr(_filename);
ctx = xmlCreatePushParserCtxt(
sax,
NULL,
NULL,
0,
filename
);
if(ctx == NULL)
rb_raise(rb_eRuntimeError, "Could not create a parser context");
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
ctx->sax2 = 1;
DATA_PTR(self) = ctx;
return self;
}
static VALUE get_options(VALUE self)
{
xmlParserCtxtPtr ctx;
Data_Get_Struct(self, xmlParserCtxt, ctx);
return INT2NUM(ctx->options);
}
static VALUE set_options(VALUE self, VALUE options)
{
xmlParserCtxtPtr ctx;
Data_Get_Struct(self, xmlParserCtxt, ctx);
if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0)
rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
return Qnil;
}
VALUE cNokogiriXmlSaxPushParser ;
void init_xml_sax_push_parser()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE sax = rb_define_module_under(xml, "SAX");
VALUE klass = rb_define_class_under(sax, "PushParser", rb_cObject);
cNokogiriXmlSaxPushParser = klass;
rb_define_alloc_func(klass, allocate);
rb_define_private_method(klass, "initialize_native", initialize_native, 2);
rb_define_private_method(klass, "native_write", native_write, 2);
rb_define_method(klass, "options", get_options, 0);
rb_define_method(klass, "options=", set_options, 1);
}
nokogiri-1.6.1/ext/nokogiri/xml_reader.h 0000644 0001750 0001750 00000000222 12261213762 017630 0 ustar boutil boutil #ifndef NOKOGIRI_XML_READER
#define NOKOGIRI_XML_READER
#include
void init_xml_reader();
extern VALUE cNokogiriXmlReader;
#endif
nokogiri-1.6.1/ext/nokogiri/html_sax_parser_context.h 0000644 0001750 0001750 00000000305 12261213762 022447 0 ustar boutil boutil #ifndef NOKOGIRI_HTML_SAX_PARSER_CONTEXT
#define NOKOGIRI_HTML_SAX_PARSER_CONTEXT
#include
extern VALUE cNokogiriHtmlSaxParserContext;
void init_html_sax_parser_context();
#endif
nokogiri-1.6.1/ext/nokogiri/xml_element_decl.c 0000644 0001750 0001750 00000002535 12261213762 021012 0 ustar boutil boutil #include
static ID id_document;
/*
* call-seq:
* element_type
*
* The element_type
*/
static VALUE element_type(VALUE self)
{
xmlElementPtr node;
Data_Get_Struct(self, xmlElement, node);
return INT2NUM((long)node->etype);
}
/*
* call-seq:
* content
*
* The allowed content for this ElementDecl
*/
static VALUE content(VALUE self)
{
xmlElementPtr node;
Data_Get_Struct(self, xmlElement, node);
if(!node->content) return Qnil;
return Nokogiri_wrap_element_content(
rb_funcall(self, id_document, 0),
node->content
);
}
/*
* call-seq:
* prefix
*
* The namespace prefix for this ElementDecl
*/
static VALUE prefix(VALUE self)
{
xmlElementPtr node;
Data_Get_Struct(self, xmlElement, node);
if(!node->prefix) return Qnil;
return NOKOGIRI_STR_NEW2(node->prefix);
}
VALUE cNokogiriXmlElementDecl;
void init_xml_element_decl()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
VALUE klass = rb_define_class_under(xml, "ElementDecl", node);
cNokogiriXmlElementDecl = klass;
rb_define_method(klass, "element_type", element_type, 0);
rb_define_method(klass, "content", content, 0);
rb_define_method(klass, "prefix", prefix, 0);
id_document = rb_intern("document");
}
nokogiri-1.6.1/ext/nokogiri/xml_xpath_context.c 0000644 0001750 0001750 00000020440 12261213762 021255 0 ustar boutil boutil #include
int vasprintf (char **strp, const char *fmt, va_list ap);
static void deallocate(xmlXPathContextPtr ctx)
{
NOKOGIRI_DEBUG_START(ctx);
xmlXPathFreeContext(ctx);
NOKOGIRI_DEBUG_END(ctx);
}
/*
* call-seq:
* register_ns(prefix, uri)
*
* Register the namespace with +prefix+ and +uri+.
*/
static VALUE register_ns(VALUE self, VALUE prefix, VALUE uri)
{
xmlXPathContextPtr ctx;
Data_Get_Struct(self, xmlXPathContext, ctx);
xmlXPathRegisterNs( ctx,
(const xmlChar *)StringValuePtr(prefix),
(const xmlChar *)StringValuePtr(uri)
);
return self;
}
/*
* call-seq:
* register_variable(name, value)
*
* Register the variable +name+ with +value+.
*/
static VALUE register_variable(VALUE self, VALUE name, VALUE value)
{
xmlXPathContextPtr ctx;
xmlXPathObjectPtr xmlValue;
Data_Get_Struct(self, xmlXPathContext, ctx);
xmlValue = xmlXPathNewCString(StringValuePtr(value));
xmlXPathRegisterVariable( ctx,
(const xmlChar *)StringValuePtr(name),
xmlValue
);
return self;
}
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char* function_name)
{
int i;
VALUE result, doc;
VALUE *argv;
VALUE node_set = Qnil;
xmlNodeSetPtr xml_node_set = NULL;
xmlXPathObjectPtr obj;
nokogiriNodeSetTuple *node_set_tuple;
assert(ctx->context->doc);
assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE));
for (i = 0 ; i < nargs ; ++i) {
rb_gc_register_address(&argv[i]);
}
doc = DOC_RUBY_OBJECT(ctx->context->doc);
if (nargs > 0) {
i = nargs - 1;
do {
obj = valuePop(ctx);
switch(obj->type) {
case XPATH_STRING:
argv[i] = NOKOGIRI_STR_NEW2(obj->stringval);
break;
case XPATH_BOOLEAN:
argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
break;
case XPATH_NUMBER:
argv[i] = rb_float_new(obj->floatval);
break;
case XPATH_NODESET:
argv[i] = Nokogiri_wrap_xml_node_set(obj->nodesetval, doc);
break;
default:
argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
}
xmlXPathFreeNodeSetList(obj);
} while(i-- > 0);
}
result = rb_funcall2(handler, rb_intern((const char*)function_name), nargs, argv);
for (i = 0 ; i < nargs ; ++i) {
rb_gc_unregister_address(&argv[i]);
}
free(argv);
switch(TYPE(result)) {
case T_FLOAT:
case T_BIGNUM:
case T_FIXNUM:
xmlXPathReturnNumber(ctx, NUM2DBL(result));
break;
case T_STRING:
xmlXPathReturnString(
ctx,
xmlCharStrdup(StringValuePtr(result))
);
break;
case T_TRUE:
xmlXPathReturnTrue(ctx);
break;
case T_FALSE:
xmlXPathReturnFalse(ctx);
break;
case T_NIL:
break;
case T_ARRAY:
{
VALUE args[2];
args[0] = doc;
args[1] = result;
node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
Data_Get_Struct(node_set, nokogiriNodeSetTuple, node_set_tuple);
xml_node_set = node_set_tuple->node_set;
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
}
break;
case T_DATA:
if(rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
Data_Get_Struct(result, nokogiriNodeSetTuple, node_set_tuple);
xml_node_set = node_set_tuple->node_set;
/* Copy the node set, otherwise it will get GC'd. */
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
break;
}
default:
rb_raise(rb_eRuntimeError, "Invalid return type");
}
}
static void ruby_funcall(xmlXPathParserContextPtr ctx, int nargs)
{
VALUE handler = Qnil;
const char *function = NULL ;
assert(ctx);
assert(ctx->context);
assert(ctx->context->userData);
assert(ctx->context->function);
handler = (VALUE)(ctx->context->userData);
function = (const char*)(ctx->context->function);
Nokogiri_marshal_xpath_funcall_and_return_values(ctx, nargs, handler, function);
}
static xmlXPathFunction lookup( void *ctx,
const xmlChar * name,
const xmlChar* ns_uri )
{
VALUE xpath_handler = (VALUE)ctx;
if(rb_respond_to(xpath_handler, rb_intern((const char *)name)))
return ruby_funcall;
return NULL;
}
NORETURN(static void xpath_exception_handler(void * ctx, xmlErrorPtr error));
static void xpath_exception_handler(void * ctx, xmlErrorPtr error)
{
VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
VALUE klass = rb_const_get(xpath, rb_intern("SyntaxError"));
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(klass, error));
}
NORETURN(static void xpath_generic_exception_handler(void * ctx, const char *msg, ...));
static void xpath_generic_exception_handler(void * ctx, const char *msg, ...)
{
char * message;
va_list args;
va_start(args, msg);
vasprintf(&message, msg, args);
va_end(args);
rb_raise(rb_eRuntimeError, "%s", message);
}
/*
* call-seq:
* evaluate(search_path, handler = nil)
*
* Evaluate the +search_path+ returning an XML::XPath object.
*/
static VALUE evaluate(int argc, VALUE *argv, VALUE self)
{
VALUE search_path, xpath_handler;
VALUE thing = Qnil;
xmlXPathContextPtr ctx;
xmlXPathObjectPtr xpath;
xmlChar *query;
Data_Get_Struct(self, xmlXPathContext, ctx);
if(rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1)
xpath_handler = Qnil;
query = (xmlChar *)StringValuePtr(search_path);
if(Qnil != xpath_handler) {
/* FIXME: not sure if this is the correct place to shove private data. */
ctx->userData = (void *)xpath_handler;
xmlXPathRegisterFuncLookup(ctx, lookup, (void *)xpath_handler);
}
xmlResetLastError();
xmlSetStructuredErrorFunc(NULL, xpath_exception_handler);
/* For some reason, xmlXPathEvalExpression will blow up with a generic error */
/* when there is a non existent function. */
xmlSetGenericErrorFunc(NULL, xpath_generic_exception_handler);
xpath = xmlXPathEvalExpression(query, ctx);
xmlSetStructuredErrorFunc(NULL, NULL);
xmlSetGenericErrorFunc(NULL, NULL);
if(xpath == NULL) {
VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
VALUE klass = rb_const_get(xpath, rb_intern("SyntaxError"));
xmlErrorPtr error = xmlGetLastError();
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(klass, error));
}
assert(ctx->doc);
assert(DOC_RUBY_OBJECT_TEST(ctx->doc));
switch(xpath->type) {
case XPATH_STRING:
thing = NOKOGIRI_STR_NEW2(xpath->stringval);
xmlFree(xpath->stringval);
break;
case XPATH_NODESET:
if(NULL == xpath->nodesetval) {
thing = Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL),
DOC_RUBY_OBJECT(ctx->doc));
} else {
thing = Nokogiri_wrap_xml_node_set(xpath->nodesetval,
DOC_RUBY_OBJECT(ctx->doc));
}
break;
case XPATH_NUMBER:
thing = rb_float_new(xpath->floatval);
break;
case XPATH_BOOLEAN:
thing = xpath->boolval == 1 ? Qtrue : Qfalse;
break;
default:
thing = Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL),
DOC_RUBY_OBJECT(ctx->doc));
}
xmlXPathFreeNodeSetList(xpath);
return thing;
}
/*
* call-seq:
* new(node)
*
* Create a new XPathContext with +node+ as the reference point.
*/
static VALUE new(VALUE klass, VALUE nodeobj)
{
xmlNodePtr node;
xmlXPathContextPtr ctx;
VALUE self;
xmlXPathInit();
Data_Get_Struct(nodeobj, xmlNode, node);
ctx = xmlXPathNewContext(node->doc);
ctx->node = node;
self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
/*rb_iv_set(self, "@xpath_handler", Qnil); */
return self;
}
VALUE cNokogiriXmlXpathContext;
void init_xml_xpath_context(void)
{
VALUE module = rb_define_module("Nokogiri");
/*
* Nokogiri::XML
*/
VALUE xml = rb_define_module_under(module, "XML");
/*
* XPathContext is the entry point for searching a Document by using XPath.
*/
VALUE klass = rb_define_class_under(xml, "XPathContext", rb_cObject);
cNokogiriXmlXpathContext = klass;
rb_define_singleton_method(klass, "new", new, 1);
rb_define_method(klass, "evaluate", evaluate, -1);
rb_define_method(klass, "register_variable", register_variable, 2);
rb_define_method(klass, "register_ns", register_ns, 2);
}
nokogiri-1.6.1/ext/nokogiri/html_sax_push_parser.c 0000644 0001750 0001750 00000004175 12261213762 021746 0 ustar boutil boutil #include
/*
* call-seq:
* native_write(chunk, last_chunk)
*
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
*/
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
{
xmlParserCtxtPtr ctx;
const char * chunk = NULL;
int size = 0;
Data_Get_Struct(self, xmlParserCtxt, ctx);
if(Qnil != _chunk) {
chunk = StringValuePtr(_chunk);
size = (int)RSTRING_LEN(_chunk);
}
if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
if (!(ctx->options & XML_PARSE_RECOVER)) {
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Nokogiri_error_raise(NULL, e);
}
}
return self;
}
/*
* call-seq:
* initialize_native(xml_sax, filename)
*
* Initialize the push parser with +xml_sax+ using +filename+
*/
static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
VALUE encoding)
{
htmlSAXHandlerPtr sax;
const char * filename = NULL;
htmlParserCtxtPtr ctx;
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
if(_filename != Qnil) filename = StringValuePtr(_filename);
if (!NIL_P(encoding)) {
enc = xmlParseCharEncoding(StringValuePtr(encoding));
if (enc == XML_CHAR_ENCODING_ERROR)
rb_raise(rb_eArgError, "Unsupported Encoding");
}
ctx = htmlCreatePushParserCtxt(
sax,
NULL,
NULL,
0,
filename,
enc
);
if(ctx == NULL)
rb_raise(rb_eRuntimeError, "Could not create a parser context");
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
ctx->sax2 = 1;
DATA_PTR(self) = ctx;
return self;
}
VALUE cNokogiriHtmlSaxPushParser;
void init_html_sax_push_parser()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE html = rb_define_module_under(nokogiri, "HTML");
VALUE sax = rb_define_module_under(html, "SAX");
VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
cNokogiriHtmlSaxPushParser = klass;
rb_define_private_method(klass, "initialize_native", initialize_native, 3);
rb_define_private_method(klass, "native_write", native_write, 2);
}
nokogiri-1.6.1/README.ja.rdoc 0000644 0001750 0001750 00000006426 12261213762 015127 0 ustar boutil boutil = Nokogiri (鋸) {
}[http://travis-ci.org/sparklemotion/nokogiri] {
}[https://codeclimate.com/github/sparklemotion/nokogiri]
* http://nokogiri.org/
* http://github.com/sparklemotion/nokogiri/wikis
* http://github.com/sparklemotion/nokogiri/tree/master
* http://groups.google.com/group/nokogiri-list
* http://github.com/sparklemotion/nokogiri/issues
== DESCRIPTION:
Nokogiri はHTMLとXMLとSAXとXSLTとReaderのパーサーです。とりわけ重要な特徴は、
ドキュメントをXPathやCSS3セレクター経由で探索する機能を持つことです。
XMLは暴力に似ている - XMLが君の問題を解決しないとしたら、君はXMLを十分に
使いこなしていない事になる。
== FEATURES:
* XPath による探索
* CSS3 のセレクターによる探索
* XML/HTMLのビルダー
XML/HTMLの高速な解析と探索検索、ならびにCSS3セレクタとXPathをサポートしています。
== SUPPORT:
日本語でNokogiriの
{メーリングリスト}[http://groups.google.com/group/nokogiri-list]
* http://groups.google.com/group/nokogiri-list
{バグ報告}[http://github.com/sparklemotion/nokogiri/issues]
* http://github.com/sparklemotion/nokogiri/issues
IRCのチャンネルはfreenodeの #nokogiri です。
== SYNOPSIS:
require 'nokogiri'
require 'open-uri'
doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
####
# Search for nodes by css
doc.css('h3.r a.l').each do |link|
puts link.content
end
####
# Search for nodes by xpath
doc.xpath('//h3/a[@class="l"]').each do |link|
puts link.content
end
####
# Or mix and match.
doc.search('h3.r a.l', '//h3/a[@class="l"]').each do |link|
puts link.content
end
== REQUIREMENTS:
* ruby 1.8 or 1.9
* libxml2
* libxml2-dev
* libxslt
* libxslt-dev
== INSTALL:
* sudo gem install nokogiri
== LICENSE:
(The MIT License)
Copyright (c) 2008 - 2010:
* {Aaron Patterson}[http://tenderlovemaking.com]
* {Mike Dalessio}[http://mike.daless.io]
* {Charles Nutter}[http://blog.headius.com]
* {Sergio Arbeo}[http://www.serabe.com]
* {Patrick Mahoney}[http://polycrystal.org]
* {Yoko Harada}[http://yokolet.blogspot.com]
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
nokogiri-1.6.1/Rakefile 0000644 0001750 0001750 00000020405 12261213762 014366 0 ustar boutil boutil # -*- ruby -*-
require 'rubygems'
gem 'hoe'
require 'hoe'
Hoe.plugin :debugging
Hoe.plugin :git
Hoe.plugin :gemspec
Hoe.plugin :bundler
Hoe.add_include_dirs '.'
GENERATED_PARSER = "lib/nokogiri/css/parser.rb"
GENERATED_TOKENIZER = "lib/nokogiri/css/tokenizer.rb"
CROSS_DIR = File.join(File.dirname(__FILE__), 'ports')
def java?
!! (RUBY_PLATFORM =~ /java/)
end
ENV['LANG'] = "en_US.UTF-8" # UBUNTU 10.04, Y U NO DEFAULT TO UTF-8?
require 'tasks/nokogiri.org'
HOE = Hoe.spec 'nokogiri' do
developer 'Aaron Patterson', 'aaronp@rubyforge.org'
developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
developer 'Yoko Harada', 'yokolet@gmail.com'
developer 'Tim Elliott', 'tle@holymonkey.com'
self.readme_file = ['README', ENV['HLANG'], 'rdoc'].compact.join('.')
self.history_file = ['CHANGELOG', ENV['HLANG'], 'rdoc'].compact.join('.')
self.extra_rdoc_files = FileList['*.rdoc','ext/nokogiri/*.c']
self.licenses = ['MIT']
self.clean_globs += [
'nokogiri.gemspec',
'lib/nokogiri/nokogiri.{bundle,jar,rb,so}',
'lib/nokogiri/{1.9,2.0}',
# GENERATED_PARSER,
# GENERATED_TOKENIZER
]
self.extra_deps += [
["mini_portile", "~> 0.5.0"],
]
self.extra_dev_deps += [
["hoe-bundler", ">= 1.1"],
["hoe-debugging", ">= 1.0.3"],
["hoe-gemspec", ">= 1.0"],
["hoe-git", ">= 1.4"],
["minitest", "~> 2.2.2"],
["rake", ">= 0.9"],
["rake-compiler", "~> 0.8.0"],
["racc", ">= 1.4.6"],
["rexical", ">= 1.0.5"]
]
if java?
self.spec_extras = { :platform => 'java' }
else
self.spec_extras = {
:extensions => ["ext/nokogiri/extconf.rb"],
:required_ruby_version => '>= 1.9.2'
}
end
self.testlib = :minitest
end
# ----------------------------------------
def add_file_to_gem relative_path
target_path = File.join gem_build_path, relative_path
target_dir = File.dirname(target_path)
mkdir_p target_dir unless File.directory?(target_dir)
rm_f target_path
ln relative_path, target_path
HOE.spec.files += [relative_path]
end
def gem_build_path
File.join 'pkg', HOE.spec.full_name
end
if java?
# TODO: clean this section up.
require "rake/javaextensiontask"
Rake::JavaExtensionTask.new("nokogiri", HOE.spec) do |ext|
jruby_home = RbConfig::CONFIG['prefix']
ext.ext_dir = 'ext/java'
ext.lib_dir = 'lib/nokogiri'
jars = ["#{jruby_home}/lib/jruby.jar"] + FileList['lib/*.jar']
ext.classpath = jars.map { |x| File.expand_path x }.join ':'
end
task gem_build_path => [:compile] do
add_file_to_gem 'lib/nokogiri/nokogiri.jar'
end
else
mingw_available = true
begin
require 'tasks/cross_compile'
rescue
puts "WARNING: cross compilation not available: #{$!}"
mingw_available = false
end
require "rake/extensiontask"
HOE.spec.files.reject! { |f| f =~ %r{\.(java|jar)$} }
windows_p = RbConfig::CONFIG['target_os'] == 'mingw32' || RbConfig::CONFIG['target_os'] =~ /mswin/
unless windows_p || java?
task gem_build_path do
add_file_to_gem "dependencies.yml"
dependencies = YAML.load_file("dependencies.yml")
%w[libxml2 libxslt].each do |lib|
version = dependencies[lib]
archive = File.join("ports", "archives", "#{lib}-#{version}.tar.gz")
add_file_to_gem archive
end
end
end
Rake::ExtensionTask.new("nokogiri", HOE.spec) do |ext|
ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact)
ext.config_options << ENV['EXTOPTS']
if mingw_available
ext.cross_compile = true
ext.cross_platform = ["x86-mswin32-60", "x86-mingw32"]
ext.cross_config_options << "--with-xml2-include=#{File.join($recipes["libxml2"].path, 'include', 'libxml2')}"
ext.cross_config_options << "--with-xml2-lib=#{File.join($recipes["libxml2"].path, 'lib')}"
ext.cross_config_options << "--with-iconv-dir=#{$recipes["libiconv"].path}"
ext.cross_config_options << "--with-xslt-dir=#{$recipes["libxslt"].path}"
ext.cross_config_options << "--with-zlib-dir=#{CROSS_DIR}"
end
end
end
# ----------------------------------------
desc "Generate css/parser.rb and css/tokenizer.rex"
task 'generate' => [GENERATED_PARSER, GENERATED_TOKENIZER]
task 'gem:spec' => 'generate' if Rake::Task.task_defined?("gem:spec")
# This is a big hack to make sure that the racc and rexical
# dependencies in the Gemfile are constrainted to ruby platforms
# (i.e. MRI and Rubinius). There's no way to do that through hoe,
# and any solution will require changing hoe and hoe-bundler.
old_gemfile_task = Rake::Task['bundler:gemfile'] rescue nil
task 'bundler:gemfile' do
old_gemfile_task.invoke if old_gemfile_task
lines = File.open('Gemfile', 'r') { |f| f.readlines }.map do |line|
line =~ /racc|rexical/ ? "#{line.strip}, :platform => :ruby" : line
end
File.open('Gemfile', 'w') { |f| lines.each { |line| f.puts line } }
end
file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
racc = RbConfig::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
racc = "#{::RbConfig::CONFIG['bindir']}/racc" if racc.empty?
racc = %x{command -v racc}.strip if racc.empty?
sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
end
file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
sh "rex --independent -o #{t.name} #{t.prerequisites.first}"
end
[:compile, :check_manifest].each do |task_name|
Rake::Task[task_name].prerequisites << GENERATED_PARSER
Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
end
# ----------------------------------------
desc "set environment variables to build and/or test with debug options"
task :debug do
ENV['NOKOGIRI_DEBUG'] = "true"
ENV['CFLAGS'] ||= ""
ENV['CFLAGS'] += " -DDEBUG"
end
require 'tasks/test'
task :java_debug do
ENV['JAVA_OPTS'] = '-Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y' if java? && ENV['JAVA_DEBUG']
end
if java?
task :test_18 => :test
task :test_19 do
ENV['JRUBY_OPTS'] = "--1.9"
Rake::Task["test"].invoke
end
end
Rake::Task[:test].prerequisites << :compile
Rake::Task[:test].prerequisites << :java_debug
Rake::Task[:test].prerequisites << :check_extra_deps unless java?
if Hoe.plugins.include?(:debugging)
['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
Rake::Task["test:#{task_name}"].prerequisites << :compile
end
end
# ----------------------------------------
desc "build a windows gem without all the ceremony."
task "gem:windows" => "gem" do
cross_rubies = ["1.9.3-p194", "2.0.0-p0"]
ruby_cc_version = cross_rubies.collect { |_| _.split("-").first }.join(":") # e.g., "1.8.7:1.9.2"
rake_compiler_config_path = "#{ENV['HOME']}/.rake-compiler/config.yml"
unless File.exists? rake_compiler_config_path
raise "rake-compiler has not installed any cross rubies. try running 'env --unset=HOST rake-compiler cross-ruby VERSION=#{cross_rubies.first}'"
end
rake_compiler_config = YAML.load_file(rake_compiler_config_path)
# check that rake-compiler config contains the right patchlevels. see #279 for background,
# and http://blog.mmediasys.com/2011/01/22/rake-compiler-updated-list-of-supported-ruby-versions-for-cross-compilation/
# for more up-to-date docs.
cross_rubies.each do |version|
majmin, patchlevel = version.split("-")
rbconfig = "rbconfig-#{majmin}"
unless rake_compiler_config.key?(rbconfig) && rake_compiler_config[rbconfig] =~ /-#{patchlevel}/
raise "rake-compiler '#{rbconfig}' not #{patchlevel}. try running 'env --unset=HOST rake-compiler cross-ruby VERSION=#{version}'"
end
end
# verify that --export-all is in the 1.9 rbconfig. see #279,#374,#375.
rbconfig_19 = rake_compiler_config["rbconfig-1.9.3"]
raise "rbconfig #{rbconfig_19} needs --export-all in its DLDFLAGS value" if File.read(rbconfig_19).split("\n").grep(/CONFIG\["DLDFLAGS"\].*--export-all/).empty?
rbconfig_20 = rake_compiler_config["rbconfig-2.0.0"]
raise "rbconfig #{rbconfig_20} needs --export-all in its DLDFLAGS value" if File.read(rbconfig_20).split("\n").grep(/CONFIG\["DLDFLAGS"\].*--export-all/).empty?
pkg_config_path = %w[libxslt libxml2].collect { |pkg| File.join($recipes[pkg].path, "lib/pkgconfig") }.join(":")
sh("env PKG_CONFIG_PATH=#{pkg_config_path} RUBY_CC_VERSION=#{ruby_cc_version} rake cross native gem") || raise("build failed!")
end
# vim: syntax=Ruby
nokogiri-1.6.1/bin/ 0000755 0001750 0001750 00000000000 12261213762 013470 5 ustar boutil boutil nokogiri-1.6.1/bin/nokogiri 0000755 0001750 0001750 00000003301 12261213762 015234 0 ustar boutil boutil #!/usr/bin/env ruby
require 'optparse'
require 'open-uri'
require 'irb'
require 'uri'
require 'rubygems'
require 'nokogiri'
parse_class = Nokogiri
encoding = nil
opts = OptionParser.new do |opts|
opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser"
opts.define_head "Usage: nokogiri [options]"
opts.separator ""
opts.separator "Examples:"
opts.separator " nokogiri http://www.ruby-lang.org/"
opts.separator " nokogiri ./public/index.html"
opts.separator " curl -s http://nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'"
opts.separator ""
opts.separator "Options:"
opts.on("--type [TYPE]", [:xml, :html]) do |v|
parse_class = {:xml => Nokogiri::XML, :html => Nokogiri::HTML}[v]
end
opts.on("-E", "--encoding encoding", "Read as encoding (default #{encoding})") do |v|
encoding = v
end
opts.on("-e command", "Specifies script from command-line.") do |v|
@script = v
end
opts.on("--rng ", "Validate using this rng file.") do |v|
@rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)}
end
opts.on_tail("-?", "--help", "Show this message") do
puts opts
exit
end
opts.on_tail("-v", "--version", "Show version") do
puts Nokogiri::VersionInfo.instance.to_markdown
exit
end
end
opts.parse!
uri = ARGV.shift
if uri.to_s.strip.empty? && $stdin.tty?
puts opts
exit 1
end
if $stdin.tty?
@doc = parse_class.parse(open(uri).read, nil, encoding)
else
@doc = parse_class.parse($stdin, nil, encoding)
end
$_ = @doc
if @rng
@rng.validate(@doc).each do |error|
puts error.message
end
else
if @script
eval @script, binding, ''
else
puts "Your document is stored in @doc..."
IRB.start
end
end
nokogiri-1.6.1/checksums.yaml.gz 0000444 0001750 0001750 00000000417 12261213762 016210 0 ustar boutil boutil ~Re+VDAm`8I:g0Q,67uxHr=ZδPWiu
C
-,Y :xt.Mbsȶ;P^D1Rf!BJ-sR0J,><,dz1*t?(t)b4ZZ[ׂ)n
;v
DZWXh~=qs