XML/ 0000755 0001760 0000144 00000000000 13611264207 010744 5 ustar ripley users XML/NAMESPACE 0000644 0001760 0000144 00000024256 13611134725 012175 0 ustar ripley users # Avoided if we are using Windows so that we can specify the directory to find the libxml DLL.
# useDynLib(XML)
importFrom(utils, menu)
importFrom(grDevices, dev.off, jpeg, pdf, png)
import(methods)
export(
append.xmlNode,
append.XMLNode,
Doctype,
asXMLNode,
comment.SAX,
dtdElement,
dtdElementValidEntry,
dtdEntity,
dtdIsAttribute,
dtdValidElement,
endElement.SAX,
entityDeclaration.SAX,
genericSAXHandlers,
getNodeSet,
xpathApply,
htmlTreeParse,
htmlParse,
libxmlVersion,
xmlDoc,
newHTMLDoc,
newXMLDoc,
newXMLNode,
newXMLNamespace,
newXMLPINode,
newXMLTextNode,
newXMLCommentNode,
newXMLCDataNode,
newXMLDTDNode,
parseDTD,
processingInstruction.SAX,
saveXML,
startElement.SAX,
supportsExpat,
supportsLibxml,
text.SAX,
toString.XMLNode,
xmlApply,
xmlAttributeType,
xmlAttrs,
"xmlAttrs<-",
xmlCDataNode,
xmlChildren,
xmlCommentNode,
xmlContainsElement,
xmlContainsEntity,
xmlDOMApply,
xmlElementsByTagName,
xmlEventHandler,
xmlEventParse,
# new.xmlEventParse,
# new.xmlTreeParse,
xmlGetAttr,
xmlHandler,
xmlName,
xmlNamespace,
xmlNode,
xmlOutputBuffer,
xmlOutputDOM,
xmlPINode,
xmlParent,
xmlAncestors,
xmlRoot,
xmlSApply,
xmlSize,
xmlSize.default,
xmlTextNode,
xmlTree,
xmlTreeParse,
xmlInternalTreeParse,
xmlNativeTreeParse,
xmlParse,
xmlValue,
names.XMLNode,
parseURI,
asXMLTreeNode,
xmlHashTree,
addNode,
xmlNamespaceDefinitions,
xmlNamespaces,
matchNamespaces,
getDefaultNamespace,
catalogResolve,
toHTML,
addChildren,
removeChildren,
removeNodes,
addAttributes,
removeAttributes,
"xmlName<-",
addSibling
# xmlFlatListTree
)
S3method(removeNodes, "list")
S3method(removeNodes, "XMLNodeSet")
S3method(removeNodes, "XMLNodeList")
S3method(removeNodes, "XMLInternalNode")
exportMethods("addAttributes", "removeAttributes")
exportMethods("toHTML")
export("xmlChildren<-")
exportMethods("xmlChildren<-")
exportClasses("XMLInternalDocument", "XMLAbstractDocument")
exportClass("URI")
if(TRUE) {
exportClasses("XMLAbstractNode", "XMLNode")
exportClasses("HTMLInternalDocument")
exportClasses("XMLInternalNode", "XMLInternalElementNode",
"XMLInternalTextNode", "XMLInternalPINode", "XMLInternalCDataNode",
"XMLInternalCommentNode", "XMLDTDNode",
"XMLXIncludeStartNode", "XMLXIncludeEndNode", "XMLEntityDeclNode",
"XMLAttributeDeclNode", "XMLDocumentNode", "XMLDocumentTypeNode",
"XMLDocumentFragNode", "XMLNamespaceDeclNode")
exportClass("XMLTreeNode")
exportClass(XMLNamespace)
exportClass(XMLNamespaceDefinitions)
}
exportMethods("coerce")
exportMethods("free")
S3method(removeChildren, XMLNode)
S3method(removeChildren, XMLInternalNode)
exportClasses(Percent, FormattedNumber, FormattedInteger)
S3method(xpathApply, XMLInternalNode)
S3method(xpathApply, XMLInternalDocument)
S3method(xpathApply, XMLNode)
export(xpathSApply)
S3method(xmlNamespaceDefinitions, XMLNode)
S3method(xmlNamespaceDefinitions, XMLInternalDocument)
S3method(xmlNamespaceDefinitions, XMLInternalNode)
S3method(xmlNamespaceDefinitions, XMLAbstractDocument)
#XXX S3method(xmlNamespaceDefinitions, XMLHashTreeNode)
#S3method(names, XMLFlatTree)
#S3method("$", XMLFlatListTree)
S3method(addNode, XMLHashTree)
S3method(xmlRoot, XMLHashTree)
S3method(print, XMLHashTree)
S3method(print, XMLInternalDocument)
S3method(print, XMLInternalNode)
S3method(print, XMLRDocument)
S3method(xmlRoot, XMLRDocument)
S3method(xmlRoot, HTMLDocument)
if(TRUE) {
S3method(xmlName, XMLComment)
S3method(xmlName, XMLNode)
S3method(xmlName, XMLInternalNode)
} else
# S4 version
exportMethods(xmlName)
S3method("names<-", XMLNode)
S3method("xmlName<-", XMLNode)
S3method("xmlName<-", XMLInternalElementNode)
exportMethods("xmlAttrs<-")
if(TRUE) {
S3method(xmlChildren, XMLTreeNode)
S3method(xmlChildren, XMLInternalDocument)
S3method(xmlChildren, XMLHashTreeNode)
S3method(xmlChildren, XMLNode)
S3method(xmlChildren, XMLInternalNode)
} else
exportMethods("xmlChildren")
if(FALSE) {
S3method(xmlParent, XMLTreeNode)
S3method(xmlParent, XMLHashTreeNode)
S3method(xmlParent, XMLInternalNode)
} else
exportMethods(xmlParent)
S3method(xmlSize, XMLHashTreeNode)
S3method(xmlSize, XMLHashTree)
S3method(xmlRoot, XMLHashTree)
S3method(xmlRoot, XMLInternalDOM)
S3method(xmlRoot, XMLInternalNode)
S3method(addChildren, XMLInternalNode)
S3method(addChildren, XMLInternalDocument)
S3method(addChildren, XMLNode)
export(replaceNodes)
S3method(replaceNodes, XMLInternalNode)
S3method(xmlSize, XMLInternalNode)
S3method(xmlValue, XMLInternalNode)
S3method(xmlValue, XMLNodeSet)
S3method(xmlValue, list)
#exportS3method("xmlValue", "NULL")
S3method("xmlValue", "NULL")
S3method("[", XMLNode)
if(TRUE) {
S3method("[[", XMLNode)
S3method("[[", XMLDocumentContent)
S3method("[[", XMLInternalNode)
S3method("[[", XMLInternalDocument)
S3method("[[", XMLHashTreeNode)
S3method("[[", XMLInternalElementNode)
}
S3method("[", XMLInternalNode)
S3method("[", XMLInternalDocument)
S3method("names", XMLInternalNode)
S3method("[<-", XMLNode)
S3method("[[<-", XMLNode)
S3method("[[<-", XMLInternalNode)
exportClass("XMLAttributes")
exportMethods("[")
export(xmlNamespaceDefinitions)
S3method(names, XMLNode)
S3method(length, XMLNode)
if(TRUE) {
S3method(xmlAttrs, XMLNode)
S3method(xmlAttrs, XMLInternalNode)
S3method(xmlAttrs, XMLElementDef)
} else
exportMethods("xmlAttrs")
S3method(xmlSize, XMLDocument)
S3method(xmlSize, default)
S3method(xmlSize, XMLNode)
S3method(print, XMLNode)
S3method(print, XMLTextNode)
S3method(print, XMLComment)
S3method(print, XMLCommentNode)
S3method(print, XMLEntityRef)
S3method(print, XMLCDataNode)
S3method(print, XMLProcessingInstruction)
S3method(xmlRoot, XMLDocument)
S3method(xmlRoot, XMLInternalDocument)
S3method(xmlRoot, XMLDocumentContent)
S3method(xmlApply, XMLNode)
S3method(xmlApply, XMLDocument)
S3method(xmlApply, XMLDocumentContent)
S3method(xmlApply, XMLInternalNode)
S3method(xmlSApply, XMLNode)
S3method(xmlSApply, XMLDocument)
S3method(xmlSApply, XMLDocumentContent)
S3method(xmlSApply, XMLInternalNode)
S3method(xmlSApply, XMLNodeSet)
S3method(xmlApply, XMLNodeSet)
if(TRUE) {
S3method(xmlValue, XMLNode)
S3method(xmlValue, XMLTextNode)
S3method(xmlValue, XMLComment)
S3method(xmlValue, XMLCDataNode)
S3method(xmlValue, XMLProcessingInstruction)
} else
exportMethods("xmlValue")
S3method(addSibling, XMLInternalNode)
S3method(xmlNamespace, XMLNode)
S3method(xmlNamespace, XMLInternalNode)
S3method(xmlNamespace, character)
# No effect if append is not generic.
S3method(append, XMLNode)
S3method(append, default)
exportMethods(saveXML)
# S3method(saveXML, XMLInternalDocument)
# S3method(saveXML, XMLInternalDOM)
# S3method(saveXML, XMLInternalNode)
# S3method(saveXML, XMLOutputStream)
# S3method(saveXML, XMLNode)
# S3method(saveXML, XMLFlatTree)
S3method(dtdElementValidEntry, XMLElementDef)
S3method(dtdElementValidEntry, XMLOrContent)
S3method(dtdElementValidEntry, XMLElementContent)
S3method(dtdElementValidEntry, character)
S3method(dtdElementValidEntry, XMLSequenceContent)
export(docName)
if(FALSE) {
S3method(docName, XMLDocument)
S3method(docName, XMLDocumentContent)
S3method(docName, XMLInternalDocument)
S3method(docName, XMLInternalNode)
S3method(docName, XMLHashTree)
} else
exportMethods(docName)
export("xmlNamespaces<-")
exportMethods("xmlNamespaces<-")
export("docName<-")
exportMethods("docName<-")
exportClass("SAXState")
export(xmlSource)
exportMethods("xmlSource")
export(xmlSourceFunctions)
exportMethods("xmlSourceFunctions")
export(xmlSourceSection)
exportMethods("xmlSourceSection")
# Not yet exported....
#xmlValidity
if(TRUE) {
exportClasses("ExternalReference", "xmlSchemaRef", "libxmlTypeTable")
exportClasses("SchemaElementTable", "xmlSchemaElementRef",
"SchemaTypeTable", "xmlSchemaTypeRef",
"SchemaAttributeTable", "xmlSchemaAttributeRef",
"SchemaAttributeGroupTable", "xmlSchemaAttributeGroupRef",
"SchemaNotationTable", "xmlSchemaNotationRef")
export(xmlSchemaValidate, schemaValidationErrorHandler, xmlSchemaParse)
exportMethods("names", "$", "$<-", "coerce")
}
#importFrom(utils, normalizePath)
export(getSibling)
S3method(getSibling, XMLInternalNode)
S3method(getSibling, XMLHashTreeNode)
export(catalogLoad, catalogClearTable, catalogAdd, catalogDump)
export(xmlStructuredStop, xmlErrorCumulator) # xmlStop
export(xmlStopParser)
export(getXMLErrors)
export(processXInclude)
S3method(processXInclude, list)
S3method(processXInclude, XMLInternalDocument)
S3method(processXInclude, XMLInternalElementNode)
exportMethods(show)
export(xmlElementSummary)
#, xmlElementSummaryHandlers)
#export(xmlNodeMatch)
#export(getRCode)
export(xmlParserContextFunction)
export(getRelativeURL)
export(xmlToList)
export('xmlValue<-')
exportMethods('xmlValue<-')
export(getEncoding)
exportMethods(getEncoding)
exportClass("XMLCodeFile")
exportClass("XMLCodeDoc")
exportMethods("[[")
export(xmlCodeFile)
exportMethods(source)
export(xmlClone)
export(findXInclude)
export(getLineNumber, getNodeLocation, getNodePosition)
export(ensureNamespace)
export(removeXMLNamespaces)
exportMethods(removeXMLNamespaces)
export(xmlParseDoc)
export(RECOVER,
NOENT,
DTDLOAD,
DTDATTR,
DTDVALID,
NOERROR,
NOWARNING,
PEDANTIC,
NOBLANKS,
SAX1,
XINCLUDE,
NONET,
NODICT,
NSCLEAN,
NOCDATA,
NOXINCNODE,
COMPACT,
OLD10,
NOBASEFIX,
HUGE,
OLDSAX)
export(libxmlFeatures)
exportClass("XMLString")
export(xml, xmlParseString, isXMLString)
export(readHTMLTable)
export(xmlToS4, makeClassTemplate) # xmlToS4List
exportMethods("xmlToS4")
export(xmlToDataFrame)
exportMethods(xmlToDataFrame)
export(compareXMLDocs)
S3method(summary, XMLInternalDocument)
export(parseXMLAndAdd)
#exportClass("XPathNodeSet")
export(xmlSerializeHook, xmlDeserializeHook)
if(FALSE) {
export(clearMemoryManagement)
exportMethods(clearMemoryManagement)
}
export("xmlParent<-")
export(xmlSearchNs)
export("xmlNamespace<-", setXMLNamespace)
export(readKeyValueDB, readSolrDoc)
export(getChildrenStrings)
export(getHTMLLinks)
export(readHTMLList)
export(getXIncludes, xmlXIncludes)
export(getHTMLExternalFiles)
export(xmlCleanNamespaces)
export(replaceNodeWithChildren)
S3method(toString, XMLNode)
XML/README 0000644 0001760 0000144 00000000774 13607624377 011651 0 ustar ripley users The Packages/ directory has some package source tar.gz files.
See index.html for a description of the package and the installation
procedures.
This R package is not in the R package format in the github repository.
It was initially developed in 1999 and was intended for use in both
S-Plus and R and so requires a different structure for each.
make ADMIN=1
copies the files to an appropriate structure for R.
It currently requires some supporting tools from the Omegahat
admin facilities.
XML/LICENSE 0000644 0001760 0000144 00000000175 13610021133 011740 0 ustar ripley users YEAR: 2015
COPYRIGHT HOLDER: Duncan Temple Lang, Bell Labs, Lucent Technologies, University of California, Davis; CRAN Team
XML/ChangeLog 0000644 0001760 0000144 00000156422 13610045535 012530 0 ustar ripley users Version 3.99-0
* We can specify R functions and C routines for use as XPath
functions in calls to getNodeSet() and xpathApply().
* Implementations of XPath 2.0 functions matches(), lower-case(),
ends-with(), abs(), min(), max(), replace()
Version 3.98-2
* xmlSave() of a document to a file with encoding now honors indenting.
Uses xmlSaveFormatFileEnc(). Issue identified by Earl Brown.
Version 3.98-1
* xmlToS4() handles attributes with namespace prefixes and children
with the same node name.
* Compilation error with clang. Simple declaration of a routine.
* xmlXIncludes() added.
* Changes to simplifyPath().
Version 3.98-0
* Update for libxml2-2.9.1 and reading from a connection for xmlEventParse().
* xmlIncludes() is a hierarchical version of getXIncludes()
* Modifications to xmlSource(), e.g. verbose = TRUE as default.
Version 3.97-0
* Fix for xmlValue(node) = text. Identified by Lawrence Edwards.
Uses xmlNodeSetContent() now and leaves freeing the original content to that routine.
* Updates for xmlSource()
Version 3.96-1
* readHTMLTable() ignores headers that are over 999 characters.
* Fix a problem in readHTMLTable() with some table headers not having
the correct number of elements to match the columns.
Version 3.96-0
* Introduced readHTMLList(), getHTMLLinks(), getHTMLExternalFiles(), getXIncludes().
* When serializing XMLNode objects, i.e. R representations of nodes, ensure " and <, etc. in attributes
are serialized correctly.
Version 3.95-1
* Allow htmlParse(), xmlParse(), etc. ?
Version 3.95-0
* Moved development version of the source code for the package to github -
https://github.com/omegahat/XML.git
* Changes to the structure of the package to allow installation directly rather than
via a one-step staging into the R package structure.
* Sample XML documents moved from data/ to exampleData, and examples updated.
* getDefaultNamespace() and matchNamespaces() use simplify = TRUE to call
xmlNamespaceDefinitions() to get the namespaces as a character vector rather than
list.
* Documentation updates
Version 3.94-0
* getNodeLocation() now reports the actual line number for text nodes rather than 0,
using the sibling nodes' or parent node's line number.
* xpathApply() and related functions work with builtin type "functions",
e.g. class.
* xpathApply() and related functions (getNodeSet, xpathSApply) allow
the caller to specify multiple queries as a character vector
and these are pasted together as compound location paths by
separating them with a '|'. This makes it easier for the
caller to manage the different queries.
* assigning to a child of a node works, e.g. node[["abc"]] = text/node
and node[[index]] = text/node. We replace a matching name. If the
replacement value is text, we use the name to
* getChildrenStrings() is a function that implements the equivalent of
xmlApply(node, xmlValue) but faster because we avoid the function call
for each element.
* options parameter for xmlParse() and htmlParse() for controlling the parser.
(Currently only used when encoding is explicitly specified.)
* encoding parameter for xmlParse() and xmlTreeParse() now works for XML documents,
not just HTML documents.
* Update for readHTMLTable() method so that we look at just the final
node
in a .
Version 3.93-1
* Fixed bug in findXInclude() that sometimes got the wrong XMLXIncludeStartNode.
Hence getNodeLocation() might report the wrong file, but correct line number!
* findXInclude() now has a recursive parameter that resolves the chain of XIncludes.
This returns the full path to the file, relative to the base/top-level document,
not just the parent document.
* Change to the default value of the error parameter in htmlParse() and htmlTreeParse()
which will generate a structured R error if there is an IO error.
The set of issues that will raise an error will be broadened in the future.
Version 3.93-0
* Enabled the fixing of namespaces by finding the definition o
for that prefix in the ancestor nodes.
Version 3.92-2
* Synchronized compilation flags for Windows with those on OSX & Linux.
Version 3.92-1
* Restore original error handler function for htmlParse() and htmlTreeParse()
* Fixed a reference counting problem caused by not adding a finalizer in the
as() method for coercing an XMLInternalNode to an XMLInternalDocument.
Example from Janko Thyson.
* Fixed up some partial argument names found by R CMD check!
Version 3.92-0
* Added --enable-xml-debug option for the configure script and this activates
the debugging diagnostic reporting, mainly for the garbage collection and node
reference counts.
* Work-around for HTML documents not being freed (but XML documents are!)
* Added an isHTML parameter for xmlTreeParse.
* Merge htmlTreeParse/htmlParse with xmlTreeParse.
* Implemented some diagnostic facilities to determine if an external pointer
is in R's weak references list. This needs support within R. (Ask for code if
you want.)
Version 3.91-0
* Start of implementation to allow nested calls to newXMLNode() to use namespace prefixes
defined in ancestor nodes. Disabled at present.
Version 3.9-4
* readHTMLTable() passes the encoding to the cell function.
* xmlValue() and saveXML() use the encoding from the document, improving conversion of strings.
* More methods for getEncoding()
Version 3.9-3
* getEncoding() returns NA when the encoding is not known. Previously, this might seg-fault!
* readHTMLTable() passes an encoding argument to the call to xmlValue (and the value of elFun).
Version 3.9-2
* Static NAMESPACE (rather than generated via configure)
* Default for directory in Makevars.win to search for header files and libraries needed
for compilation.
Version 3.9-1
* Added method for removeNodes for XMLNodeList.
Version 3.9-0
* Enabled additional encoding for element, attribute and namespace names, and
in xmlValue().
* Corrected default value in documentation for parse in xmlSource().
Version 3.8-1
* Corrected documentation for readHTMLTable() about stringsAsFactors behaviour.
* Added parse = FALSE as parameter for xmlSource() to allow just returning the text from
each node.
Version 3.8-0
* added readSolrDoc() and readKeyValueDB() functions to read Solr and Property list documents.
Version 3.7-4
* saveXML() for XMLNode returns a character vector of length 1, i.e. a single string.
Version 3.7-3
* Allow xmlTreeParse() and xmlParse() to process content starting with a BOM.
This works when the name of a file/URL is provided, but didn't when the content
was provided directly as a string. Identified by Milan Bouchet-Valat.
* error message when XML content is not XML or a file name now puts the content at the end
for improved readability.
Version 3.7-2
* Import methods package explicitly.
Version 3.7-1
* Added an alias for the coerce method for Currency.
* Added a C routine to query if reference counting is enabled.
See tests/checkRefCounts.R.
Version 3.7-0
* Added Currency as an option for colClass in readHTMLTable to
convert strings of the form $xxx,yyy,zzz, i.e. comma-separated
and preceeded by a $. (No other currency supported yet.)
* Fix for newXMLNode() that caused a seg fault if a node was specified
as the document. Thanks to Jeff Allen.
Version 3.6-2
* Changed URL in readHTMLTable() example to new page for population of
countries
* Changes to Rprintf() rather than stderr. Still some code that uses stderr
intentionally.
Version 3.6-1
* Fix bug which caused XMLInternalUnknownNode in xmlParent() for HTML documents.
* General improvements to support nodes of type XML_HTML_DOCUMENT_NODE.
* removeNodes() method for XMLNodeSet.
Version 3.6-0
* xmlParent() is an S4 generic with methods.
* xmlAncestors() has a count argument to limit the number of ancestors
returned.
* removeNodes() is generic.
* addChildren() now removes "internal" nodes from their current parent, if any.
Avoids memory corruption in XML tree.
* ADD_XMLOUTPUT_BUFFER R variable for Windows.
* Defined XMLTreeNode as an old-style class.
Version 3.5-1
* Additional workaround for libxml2 2.6.16 for printing HTML document.
* noMatchOk parameter for xpathApply.XMLInternalNode to suppress warnings about
finding no nodes when there is a namespace in the query.
* xmlNamespace<-() function and methods to allow one to set the namespace
on a node, e.g., by the namespace prefix.
* readHTMLTable() allows "factor" as an entry in colClasses.
Version 3.5-0
* Addeds nsDef parameter for parseXMLAndAdd().
* Minor addition to readHTMLTable() methods to handle malformed HTML
with all the tr nodes in the thead.
Version 3.4-3
* Set default of append parameter in xmlChildren<-() method for non-internal nodes
to FALSE so that we replace the existing nodes.
Version 3.4-2
Version 3.4-1
* Type in C code for method for xmlClone().
* Minor fixes for formatting of 2 help/Rd files.
* Removed definition of XPathNodeSet which is never used here but redefined in Sxslt.
* Fix when adding a default namespace to a node in an HTML document.
* Fix when adding a default namespace to a node in an HTML document.
Version 3.4-0
* Added xmlSearchNs() to aid looking for XML definitions by URL or prefix.
* Support in readHTMLTable() for identifying values formatted as percents
or numbers with commas. Use the classes FormattedInteger, FormattedNumber
and Percent in colClasses.
Version 3.3-2
* Better handling of namespace definitions and uses in newXMLNode
and separation of internal code into a separate function.
Version 3.3-1
* Configuration to conditionally compile code and export functions
for removing finalizers. This relies on C routines tha will be
added to the base R distribution, so not present in any released
version of R as yet.
Version 3.3-0
* addFinalizer added as parameter to many functions and methods that
can return a reference to an internal/C-level node. This controls
whether a finalizer is added to the node and reference counting
is performed. See MemoryManagement.pdf/.html for more details.
* One can set the suppressXMLNamespaceWarning as either an XML option (via setOption())
or as a regular R option (via options(suppressXMLNamespaceWarning = ...) )
* Added methods for docName() for XMLHashTreeNode and XMLNode.
* added docName when converting from an internal tree to an XMLHashTree.
* xmlHashTree() uses an environment with no parent environment, by default.
* Added an append parameter to addChildren().
* Fixed coercion from XMLInternalNode to XMLNode.
* Made the methods (e.g. xmlAttrs<-(), xmlParent(), ...)
for XMLNode and XMLInternalNode consistent.
* Made classes agree for xmlParse() and newXMLDoc()
* fixed corner/end cases for getSibling for XMLHashTreeNode
* Added xmlRoot<- methods for XMLInternalDocument and XMLHashTree.
* Minor enhancement to xmlToDataFrame() so that one can pass
the value from getNodeSet() directly as the first argument to xmlToDataFrame()
without passing it via the nodes parameter.
* Registered all of the native routines being invoked via .Call().
Version 3.2-1
* Turn reference counting on by default again.
Version 3.2-0
* Change to reference to normalizePath() which was moved from utils to base in R-devel/R-2.13
Version 3.1-1
* Minor change in readHTMLTable method to identify table header better.
Version 3.1-0
* Method for [[ for internal element nodes that is much faster (by avoiding
creating the list of children and then indexing that R list).
Thanks to Stavros Mackracis for raising the issue.
Version 3.0-0
* This is not a major release, but an incremental numbering from 2.9-0 to 3.0-0, but with
one potentially significant change related to creating nodes. newXMLNode() now uses
the namespace of the parent node if the namespace argument is not specified.
* Refinments to improve the garbage counting and referencing counting on internal nodes.
Version 2.9-0
* xmlAttrs(, TRUE) for internal nodes returns the URL of each namespace definition
in the names of the attr(, "namespaces") vector.
* Added parseXMLAndAdd() to parse XML from a string text and
add the nodes to a parent node. This facilitates creating
a large number of quite regular nodes using string processing
techniques (e.g. sprintf(), paste())
* xmlEventParse() with branches now has garbage collecting activated.
Version 2.8-1
* Filled in missing documentation
* Added missing init = TRUE for the parameters in one of the methods for xmlSource().
Version 2.8-0
* xmlClone() puts the original S3 classes on the new object.
* Trivial fix to readHTMLTable() to get the header when the table header is inside
a tbody.
* Garbage collection/Memory management re-enabled.
Version 2.7-0
* compareXMLDocs() function
* Added xmlSourceFunctions() and xmlSourceSection()
* Support in saveXML() for XMLInternalDocument for the prefix parameter.
* saveXML() and related methods can deal with NULL pointers in
XMLInternalDocument objects.
* fixed bug in catalogAdd().
* docName() made an S4 generic with S4 methods (rather than S3 methods).
* added catalogDump()
* readHTMLTable() puts sensible names on the data frames if there is no header for the table.
Version 2.6-0
* When copying a node from one document to another, the node is explicitly
copied and not removed from the original document. This also fixes a problem
with the name space not being on the resulting node.
* New functions for converting simple, shallow XML structure to an R data frame.
xmlToDataFrame() & xmlToList()
* addChildren() can handle _copying_ a node from a different document.
* as()/coerce() method for URI to character.
* New functions to convert an XML tree to an S4 object and also to infer
S4 class definitions from XML. (makeClassTemplate(), xmlToS4())
* Minor change to C code for compilation on Solaris and Sun Studio
Version 2.5-3
* Trivial change to an Rd file to add an omitted
Version 2.5-2
* Configuration enhanced to handle very old (but standard on OS X) versions of libxml which do not have
the xmlHasFeature() routine.
People with such an old version of libxml (i.e. 2.6.16) should consider upgrading. That is 5 years old.
Version 2.5-1
* Added a configuration check and compile time condition for the presence of XML_WITH_ZLIB. This
allows installation with older versions of libxml2 such as 2.6.26.
* Moved some old S3 classes to S4 class definitions to deal with recent changes to the methods package.
Version 2.5-0
* Added xmlParseDoc() and parser option constants. These allow one to parse a document
from a file, URL or string and specify any combination of 20 different options controlling
the parser, e.g. whether to replace entities, perform XInclude, add start and end XInclude nodes,
expand entities, load external DTDs, recover when there are errors.
* Added libxmlFeatures() to dynamically determine which features were compiled into the version
of libxml2.
* newXMLNode() has a new argument sibling which is used to add the new node as the sibling of this
node. The parametr 'at' is used as the value for the 'after' parameter in addSibling().
* saveXML() is now an S4 generic. (Changes in other packages, e.g. Sxslt, RXMLHelp.)
* Added readHTMLTable() which is a reasonably robust and flexible way to read HTML tables.
* Added runTime parameter for libxmlVersion() so we can get compile and run time version information.
Version 2.4-0
* Significant change to garbage collection facilities for internal/C-level nodes.
This works hard to ensure that XMLInternalDocument objects and XMLInternalNode objects
in R remain valid even when their "parent" container is released in R. See memory.pdf.
This can be disabled with configuration argument --enable-nodegc=no.
* Configuration option to compile with xmlsec1 (or xmlsec1-openssl). More to come on support for this.
Version 2.3-0
* Added getLineNumber() to be able to determine the line number of an XML node within
its original document.
* xmlApply() and xmlSApply() have a parameter to ignore the XInclude start and end nodes.
* xmlChildren() also have an omitNodeTypes parameter and by default exclude XInclude nodes.
* Added ensureNamespace() to add a namespace definition(s) if necessary.
Version 2.2-1
* source() method equivalent to xmlSource() and appropriate installation
changes for older versions of R ( < 2.8.0).
Version 2.2-0
* Added xmlClone() and findXInclude() functions.
* [Important] Bug fix regarding the error handling function for XML and HTML parsing.
Uncovered by Roger Koenker. This manifested itself in R errors of the form
"attempt to apply non-function".
Version 1.99-1
* addChildren() unconditionally unlinks nodes that already have a parent.
* Typo bug in removeChildren.XMLNode code found and fixed by Kate Mullen.
Version 1.99-0
* Added recursive parameter to xmlValue() function to control whether to work on just the
immediate nodes or also children.
* Correction for xpathSApply() when returning an array/matrix which referred to a non-existent variable.
* Faster creation of internal nodes via newXMLNode().
* xmlRoot() for XMLHashTree works for empty trees.
* Added xmlValue<-() function.
* Fix for removeAttributes() with namespaces.
* Addition to configure script of the argument --with-xml-output-buffer to force
whether to compile and use our own "local" version of xmlOutputBufferCreateBuffer()
which is needed on unusual systems. Supplied by Jim Bullard (UC Berkeley).
Version 1.98-1
* Deal with older S3-style classes with inheritance for 2.7.2 differently from the 2.8.0
mechanism.
* Changes to catch more cases of xmlChar * being treated as char * which causes the Sun compiler to
fail to compile DocParse.c
* Export class XMLNamespaceDefinitions which caused problems in the code in the caMassClass package.
Version 1.98-0
* The function XML:::xpathSubNodeApply() is the implementation of xpathApply() for an XMLInternalNode
from earlier versions of the package and which explicitly moves the node to a new document and performs
the XPath query and then re-parents the node. Instead of using this, users can use xpathApply()/getNodeSet()
and simply change the XPath expression to be prefixed with ., e.g. instead of //tr, use .//tr to root the
XPath query at the current node.
* Minor patch to configure.in to allow for libxml2-2.7.*.
* saveXML() for XMLInternalDocument now uses xmlDocFormatDump() ratehr than xmlSaveFile()
and so formatting is "better".
* The [ and [[ operators for XMLInternalDocument support a 'namespaces' parameter
for ease of extracting nodes. This is syntactic sugar for getNodeSet()/xpathApply().
* xmlParse() and htmlParse() return internal documents and nodes by default and are easier to type.
The results are amenable to XPath queries and so these are the most flexible representations.
* xmlRoot() has a skip argument that controls whether to ignore comment and DTD nodes.
The default is TRUE.
* Additional functionality for XMLHashTree and XMLHashTreeNode, including facilities for creating nodes
while adding them to the tree, copying sub-trees/nodes to separate trees.
* Functionality to convert from an XMLInternalNode to an XMLHashTree - as(node, "XMLHashTree").
This is also an option in xmlTreeParse(, useHashTree = TRUE/FALSE)
[or xmlTreeParse(, treeType = "hashTree")]
* Branch nodes from xmlEventParse(, branches = list(...)) are now garbage collected appropriately.
* xmlAttrs.XMLInternalNode now does not add the namespace prefix to the name of the attribute,
by default. Use xmlAttrs(node, addNamespace = TRUE) to get old behaviour.
* xmlGetAttr() has a corresponding new parameter addNamespace that is passed through to the call to
xmlGetAttr().
* getRelativeURL() function available for getting URI of a document from a given attribute
relative to a base URL, e.g. an HTML or a .
* xmlAttrs<- methods support an append (TRUE by default) to add values to the existing attributes,
or to replace the existing ones with the right-hand side of the assignment.
* xmlAttrs<- checks for namespaces in all the ancestors for XMLInternalNode and XMLHashTreeNode.
* Introduced the class XMLAbstractNode which is the parent for the XMLNode, XMLInternalNode and
XMLHashTreeNode, which allows high-level methods that use the API to access the elements of the nodes
to be defined for a single type.
* Changed name of XMLNameSpace class to XMLNamespace (lower-case 's').
Version 1.97-1
* Fix for configuration in detecting existence of encoding
enumerations in R. So now encoding of strings is working again.
Version 1.97-0
* Added xmlNativeTreeParse() as an alias for xmlInternalTreeParse()
and xmlTreeParse(, useInternalNodes = TRUE).
* Assignment to attributes of an R-level XML node works again, e.g.
xmlAttrs(doc[[3]][[2]])['foo'] = "bar"
* Subsetting ([[) for XMLHashNode behaves correctly.
* Added .children parameter to addTag() function in xmlOutputDOM() objects.
* Thanks to Michael Lawrence, a significantly simpler and more
general mechanism is used for getNodeSet()/xpathApply() when
applied to a node and not a document. This allows xpath queries
that go back up the ancestor path for the node.
Version 1.96-0
* Functionality for working with XML Schema now incorporated.
* xmlSchemaValidate() function for validating a document against a schema.
* xmlSchemaValidate() using structured error handlers to give
information about line numbers, columns, domain, etc. as well as
the message.
* xmlChildren() method for XMLInternalDocument
* Recognize additional internal node types,
e.g. XMLXIncludeStartNode, ...
* foo.dtd example now uses internal and external entities for illustration.
Version 1.95-3
* configuration change to support older versions of R that do not
have the C enumeration type cetype_t defined in Rinternals.h.
Version 1.95-2
* Fix for xpathApply()/getNodeSet() on the top-level node of a document
which left the original document with no children! Found by Martin Morgan.
Version 1.95-1
* Minor bug fixes regarding Encoding issues introduce in 1.95-0.
* xmlEventParse() calls R_CheckUserInterrupt() when making callbacks to R functions
and so should make the GUI more responsive.
* Test for older versions of libxml2 which did not have a context field in the xmlNs
data structure.
Version 1.95-0
* Use the encoding of the document in creating R character strings to identify
the Encoding() in R. There are probably omissions and potential problems, so
I would be very grateful for examples which fail, along with the file, the locale
and the R code used to manipulate these.
Version 1.94-0
* Fixed a bug in xpathApply()/getNodeSet() applied to an XMLInternalNode
which now ensures that the nodes emerge with the original internal document
as their top-level document.
* Added processXInclude() for processing individual XInclude nodes
and determining what nodes they add.
* If asText is TRUE in xmlTreeParse(), xmlInternalTreeParse(), ...,
no call to file.exists() is made. This is both sensible and
overcomes a potential file name length limitation (at least on
Windows).
* The trim parameter for xmlInternalTreeParse() and
xmlTreeParse(, useInternal = TRUE) causes simple text nodes
containing blank space to be discarded. saveXML() will, by
default, put them back but not if text nodes are explicitly added.
* xmlTreeParse(), xmlInternalTreeParse(), htmlTreeParser(),
parseDTD(), etc. take an error handler function which defaults to
collecting all the errors and reporting them at the end of the
attempt to parse.
* getXMLErrors() returns a list of errors from the XML/HTML parser
for help in correcting documents.
* Added xmlStopParser() which can be used to terminate a parser from
R. This is useful in handler functions for SAX-style parsing via
xmlEventParse().
* A handler function passed to xmlEventParse() can indicate that it
wants to be passed a reference to the internal xmlParserContext by
having the class XMLParserContextFunction. Such functions will be
called with the context object as the first argument and the usual
arguments displaced by 1, e.g. the name and attributes for a
startElement handler would then be in positions 2 and 3.
* When parsing with useInternalNodes= TRUE and trim = TRUE in
xmlTreeParse() or xmlInternalTreeParse(), blank nodes are discarded
so line breaks between nodes are not returned as part of the tree.
This makes pretty-printing/indenting work on the resulting
document but does not return the exact content of the original
XML. Use trim = FALSE to preserve the breaks.
* Added xmlInternalTreeParse() which is a simple copy of xmlTreeParse()
with useInternalNodes defaulting to TRUE, so we get an internal C-level tree.
* Added an xpathSApply() function that simplifies the result to a
vector/matrix, if possible.
* Added replaceNode() function which allows one to insert an internal node
with another one.
* addChildren() has a new at parameter to specify where in the list
of children to add the new nodes.
* newXMLNode(), etc. can compute the document (doc argument) from
the parent.
* The subset operator applied to an XMLInternalDocument and
getNodeSubset() and xpathApply() compute the namespaces from the
top-level of the document by default, so, e.g., doc[["//r:init"]] work.
* section parameter added to xmlSource() to allow easy subsetting to
a particular within a document.
* added catalogLoad(), catalogAdd(), catalogClearTable() functions.
* Added docName() function for querying the file name or URL of a
parsed XML document.
* RS_XML_createDocFromNode() C routine adds root node
correctly via xmlAddChild().
* Slightly improved identification of HTML content rather than a file or URL name.
* Added a simplify parameter to the xmlNamespaceDefinition()
function which, if TRUE, returns a character vector giving the
prefix = URI pairs which can be used directly in xpathApply() and
getNodeSet().
Version 1.93-1
* Method for xmlNamespace with a character is now exported! Needed for cases that arise in
SSOAP.
Version 1.93-0
* The closeTag() function within an XMLInternalDOM object returned by xmlTree() provides
support for closing nodes by name or position in the stack of open nodes.
* xmlRoot() method for an XMLInternalDOM tree.
* Added a parent argument to the constructor functions for internal nodes, e.g. newXMLNode,
newXMLPINode, newXMLCDataNode, etc.
* doc argument for the constructor functions for internal nodes is now moved from second to third.
Calling
* Potentially changed the details about creating XML documents and nodes with namespaces. If these
negatively effect your code, please send me email (duncan@wald.ucdavis.edu).
* Enhancements and fixes for creating XML nodes and trees, especially with name spaces.
* Many minor changes to catch special cases in working with internal nodes.
Version 1.92-1
* Make addNode()/addTag() in XMLInternalDOM work with previously created XML nodes via newXMLNode().
Thanks to Seth Falcon for pointing out this omission. More improvements in the pipeline for generating
XML.
* addChildren for an XMLInternalNode can be given a list of XMLInternalNodes and/or character strings.
* xmlSource() handles r:codeIds better.
Version 1.92-0
* Added removeNodes function for unlinking XMLInternalNode objects directly by reference.
* xmlRoot() handles empty documents.
* Documentation cleanups.
Version 1.91-1
* Remove output about "cleaning"/releasing an internal document pointer.
* The warning from getNodeSet/xpathApply about using a prefix for the default namespace
now has a class/type of condition, specificall "XPathDefaultNamespace".
Version 1.91-0
* argument to add a finalizer for an XMLInternalDocument in xmlTreeParse()/htmlTreeParse() when
useInternalNodes = TRUE. If this is set, automatic garbage collection is done which will free
any sub-nodes. If you want to work with any of these nodes after the top-level tree variable
has been released, specify addFinalizer = FALSE and explicitly free the document yourself with the
free() function.
* Sme improvements on namespace prefixes in internal nodes. See newXMLNode().
* classes for additional XMLInternalNodes (e.g. XMLInternalCDataNode) now exported
* removeAttributes() has a .all argument to easily remove all the attributes within a node.
Supported for both R and internal style nodes.
* xmlAttrs<-() function for simply appending attributes to a node.
* If xmlTreeParse() is called with asText = FALSE and the file is not found, an error of class
"FileNotFound" is raised.
* [[ opertor for XMLInternalDocument to get the first/only entry in
the node set from an XPath query. This is a convenience
mechansim for accessing the element when there is only one.
Version 1.9-0
* Added xmlAncestors() functions for finding chain of parent nodes, and optionally applying a
function to each.
* xmlDoc() allows one to create a new XML document by copying an existing internal node, allowing
for work with sub-trees as regular documents, e.g. XPath queries restricted to a subset of the
tree.
* Ability to do XPath searches on sub-nodes within a document. getNodeSet() and xpathApply()
can now operate on an XMLInternalNode by creating a copy of the node and its sub-nodes into a
new document. However, these is memory leak associated with this and you should us xmlDoc()
to create a new document from the node and then perform the XPath query on that and free the
document.
Version 1.8-0
* Added xinclude argument to xmlTreeParse() and htmlTreeParse() to control whether
should be resolved and
the appropriate nodes inserted and the actual node discarded.
* The namespaces argument of getNodeSet() (and implicitly of the [ method for an
XMLInternalDocument object) can be a simple prefix name when referring to the
default namespace of the document, e.g.
getNodeSet(doc, "/r:help/r:keyword", "r")
when the document has a default namespace.
* Added a 'recursive = FALSE' parameter to xmlNamespaceDefinitions() to be able to
process all descendant nodes and so fetch the namespace definitions in an entire
sub-tree. This can be used as input to getNodeSet(), for example.
* as() method for converting an XMLInternalDocument to a node.
* xmlNamespaceDefinitions() handles the case where the top-level element
is not the first node, e.g. when there is a DOCTYPE node and/or a comment.
Version 1.7-3
* addChildren() coerces a string to an internal text node before adding the child.
Version 1.7-2
* Trivial error in free() for XMLInternalDocument objects fixed so the memory is released.
Version 1.7-1
* addition to configuration to detect whether the checked field of the xmlEntity structure is present.
Version 1.7-0
This a quite comprehensive enhancement to the facilities in the XML package. A lot of work on
the tools for creating or authoring XML from within R were added and improved. Using internal
nodes directly with newXMLNode() and friends, or using xmlTree() is probably the simplest.
But xmlHashTree() creates them in R.
* IMPORTANT: one can and should use the names .comment, .startElement, .processingInstruction,
.text, etc. when identifying general element handlers that apply to all elements of a particular type
in an XML document rather than to nodes that have a particular name. This differentiates between
a handler for a node named, say, text and a handler for all text elements found in the document.
To use this new approach, call xmlTreeParse() or xmlEventParse() with
useDotNames = TRUE
This will become the default in future releases.
* namespaceHandlers() function provided to deal with node handler functions with XML name spaces where
there may be multiple handlers for the same node name but which are in different XML name spaces.
* signature for entityDeclaration function in SAX interface is changed so that the second argument
identifies the type of entity. Also, to query the value of an entity, the C code calls the
getEntity() method of the handlers.
* addChildren() & removeChildren() and addAttributes() & removeAttributes() for an existing node allows for
post-creation modification of an XML node.
* Improved support for name spaces on node attributes.
* xmlName<-() methods for internal and R-level XML nodes to change the name of a node.
* saveXML() and as(, "character") method for XMLInternalNode objects now to create a text representation of the
internal nodes.
* xmlTree() allows for creating a top-level node in the call to xmlTree() directly and does not
ignore these arguments.
* DTD and associated DOCTYPE can be created separately or directly in xmlTree().
* xmlTree() now allows the caller to specify the doc object as an argument, including NULL
for when the nodes do not need to have a document object.
* Better support in xmlTree() for namespaces and maintaining a default/active namespace prefix that is to be
inserted on each subsequent node.
* new functions for creating different internal node types - newXMLCDataNode, newXMLPINode, newXMLCommentNode, newXMLDTDNode.
* newXMLNode() handles text, using the new newXMLTextNode() and coerce methods.
* xmlTree() supports an active/default name space prefix which is used for new nodes.
* Resetting the state of the xmlSubstituteEntities variable is handled correctly in the case of an error.
Version 1.6-4
* xmlSize() method for an XMLInternalNode.
Version 1.6-3
* Handle change from Sys.putenv() to Sys.setenv().
Version 1.6-2
* Added a URI (old) class label to the result of parseURI, and exported that class for use in
other packages (specifically SSOAP, at present).
* For subsetting child nodes by name, there is a new all = FALSE parameter which allows the caller
to get the first element(s) that matches the name(s), or all of them with, e.g.
node["bob", all = TRUE]. This allows us to avoid the equivalent idiom
node[ names(node) == "bob" ]
which is complicated when node is the result of an inline computation.
* added method for setting names on an XMLNode (names<-.XMLNode), not just for retrieving them.
Version 1.6-1
* Added catalogResolve() function for looking up local files and aliases for URIs, and
PUBLIC and SYSTEM identifiers, e.g. in DOCTYPE nodes.
* saveXML method added for XMLFlatTree. (Identified by Alberto Monteiro.)
* Fixed saveXML methods for various classes.
* Doctype class: added validity method, improved coercion to character, and slightly more flexible
constructor function. Validates PUBLIC identifier.
Version 1.6-0
* In saveXML() method for XMLInternalDocument, we "support" the encoding argument by passing it to
xmlDocDumpFormatMemoryEnc() or xmlSaveFileEnc() in the libxml2 C code.
We could also use the xmlSave() API of libxml2.
* htmlTreeParse() supports an encoding argument, e.g. htmlTreeParse("9003.html", encoding = "UTF-8").
This allows one to correctly process HTML documents that do not contain their encoding information in the
tag.
The argument is also present in xmlTreeParse() but currently ignored.
Version 1.5-1
* updated documentation for the alias for free method for XMLInternalDocument.
Version 1.5-0
* added free() generic function and method for XMLInternalDocument
Version 1.4-2
* xmlTreeParse and htmlTreeParse will accept a character vector of length > 1
and treat it as the contents of the XML stream and so call
paste(file, collapse = "\n") before parsing. The asText = TRUE is implied.
Thanks to Ingo Feinerer for prompting this addition.
Version 1.4-1
* Fix to ensure a connection is closed in saveXML. Identified by Herve Pages
* Update definition and documentation for xmlAttrs to take ... arguments.
Version 1.4-0
* Added fullNamespaceInfo parameter for xmlTreeParse() which, if TRUE,
provides the namespace for each node as a named character vector giving
the URI of the namespace and the prefix as the element name, i.e. c(prefix = uri)
The default is FALSE to preserve the earlier behavior. The namespace object
has a class XMLNamespacePrefix for the old-style, and XMLNamespace for the new
style with c(name = uri) form.
This information makes comparing namespaces a lot simpler, e.g. in SOAP.
Version 1.3-2
Mainly fixes for internal nodes.
* Export XMLNode, XMLInternalNode, XMLInternalElementNode classes
* as() method for XMLInternalNode wasn't recognized properly because
the classes weren't exported.
Also, the internal function asRXMLNode() accepts trim and ignoreBlanks
arguments for cleaning up the XML node text elements that are created.
* export coerce methods.
Version 1.3-1
* parseURI() sets the port to NA if the value is 0.
Version 1.3-0
* The SAX parser now has a branches argument that identifies XML elements
which are to be built into (internal) nodes and then the sub-tree/node
is passed to the handler function specified in the element of the branches
argument. This mixes the efficient SAX event-driven parsing with the easier
programming tree-based model, i.e. DOM.
* XMLInternalNode objects in R now have extra class information identifying them as
as regular element, text, CDATA, PI, ...
Version 1.2-0
* names() method for XMLInternalNode
* [ method for XMLInternalDocument and string using XPath notation.
* getNodeSet() has support for default namespaces in the XML document.
It is available, by default, to the XPath expression with the prefix 'd'.
* Exported xmlNamespace() method for XMLInternalNode.
* xmlNamespaceDefinitions() made generic (S3) and new method for
XMLInternalNode class.
Version 1.1-1
* Change to handling entities in printing of regular R-level XML text nodes
created during xmlTreeParse() call. Identified by Ingo Feinerer.
* saveXML for an XMLNode object will take a file name and write to the corresponding
file, overwriting it if it already exists.
Version 1.1-0
* xpathApply and getNodeSet take functions to be applied to nodes in a node
set resulting from an XPath query.
Version 1.0-0
* Version skipped as it is not a milestone release, just ran out of numbers!
Version 0.99-94
Changes from Russell Almond and suggestions from Franck Giolat for creating XML in R
* xmlNode() puts the names on children if omitted. Caller can use names other
than the XML element name (but this is not necessarily advisable).
* Added xmlChildren() method to set the children.
* Printing of an XML node to the console handles empty nodes and text nodes better.
* xmlTextNode() will replace reserved characters with their entity equivalent, e.g.
& with & and < with <. One can specify the entity vector including providing
an empty one should one want to avoid replacement.
Version 0.99-93
Changes from Martin Morgan
* import normalizePath from utils.
* Changes to configure.win to find 3rd party DLLs in bin/ directory, not lib/
Version 0.99-92
* Fix for setting DTD entity field uncovered by the strict type checking in R internals.
Version 0.99-91
* Added an encoding argument to saveXML(), initially for use in the Sxslt package.
Version 0.99-9
* Example of using namespaces in getNodeSet()
* Examples for xmlHashTree().
Version 0.99-8
* Introduced initial version of flat trees for storing the DOM in a
non-hierarchical data structure in R. This allows us to work with
a mutable tree and to perform certain operations across all the
nodes more efficiently, i.e. non-recursively. Importantly, one
can find the parent node of a given node in the tree which is not
possible with the list of list approach. It does mean more
computation for some common operations, specifically parsing.
Indeed, it can be 25 times slower for a non-trivial file, i.e. one
with. However, for a file with 7700 nodes, it still only takes 2
1/2 seconds. So there is a trade-off. While there are a few
versions in the code, xmlHashTree() is the one to use for speed
reasons. xmlFlatListTree() is another and xmlFlatTree() is
excruciatingly slow. See tests/timings.R for some comparisons.
xmlGetElementsByTagName and other facilities work on these types
of trees.
More functions and methods can and should be provided to work with
these trees if they turn out to be used in any significant way.
* add the R attribute 'namespaces' to an XML node's attributes
vector so that one can differentiate between conflicting attribute
names with different namespaces.
* added parseURI() to return the elements of a URI from a string.
Version 0.99-7
* Example of reading HTML tables using XPath and internal nodes in bondsTables.R
* Some additional methods for XMLInternalNode.
Version 0.99-6
* configure does not require the GNU sed, but can use any version of sed now that the
use of + in the regular expression has been removed.
Version 0.99-5
* Added append.XMLNode and append.xmlNode to the exported symbols from the NAMESPACE
file.
Version 0.99-4
* Fix for addComment() in xmlOutputDOM().
* Removed all the compilation warnings about interchanging xmlChar* and char*.
Version 0.99-3
* Added support in print methods for XML objects for indent = FALSE,
and tagSeparator, which defaults to "\n". These can be used to print
a faithful representation of an original XML document, but only when
used in combination with
xmlTreeParse( skipBlanks = FALSE, trim = FALSE)
Version 0.99-2
* Problems compiling with libxml2-2.5.11 and libxml2-2.6.{1,2}, so
we now test for a recent version of libxml. The test uses sed -r
which may cause problems. If one really wants to avoid the tests
set the environment variable FORCE_XML2 to any value before running
R CMD INSTALL XML.
* Documentation for getNodeSet() didn't refer to the new namespaces argument.
Version 0.99-1
* getNodeSet() takes a namespaces argument which is named character vector of
prefix = URI pairs of namespaces used in the XPath expression.
* Handlers for xmlEventParse() can include startDocument and endDocument elements
to catch those particular events. Useful for closing connections and general cleanup,
especially in the "pull" data source, i.e. connections or functions.
* xmlEventParse() when called with a function as the data source now doesn't have
a new line appended to each string returned to the parser by the function.
* Passing a connection to xmlEventParse() now uses a regular R function to call
readLines(con, 1) and no longer does this via C code to call readLines().
* Fix to the example in xmlEventParse() using the state variable.
Version 0.99-0
* Implementation for the endElement in the xmlEventParse() for saxVersion == 2.
* In xmlEventParse( , saxVersion = 2), the namespaces come as a named vector
in the fourth argument.
Version 0.98-1
* Messages from errors are now more informative. Using saxVersion = 2 in xmlEventParse(), you get
get the line and column information about the error.
Version 0.98
* Added saxVersion parameter to xmlEventParse() to control which interface is used at the C level.
This changes the arguments to the startElement handler, adding the namespace for the
element.
* Added xmlValidity() function to set the value of the default validity action. This allows us to do the
setting in the R code. This is currently not exported.
* Added recursive parameter to xmlElementsByTagName() function. This provides functionality
similar to getElementsByTagName() in XML parsing APIs for other languages.
* xmlTreeParse() called with no handlers and useInternalNodes returns a reference to the
C-level xmlDocPtr instance. This is an object of class "XMLInternalDocument". This can be
used in much the same way as the regular "XMLDocument" tree returned by xmlTreeParse,
e.g. xmlRoot, etc.
* Added getNodeSet() to evaluate XPath expressions on an XMLInternalDocument object.
* Added a validate parameter to the xmlEventParse() function.
Version 0.97-8
* Fix error where CDATA nodes and potentially other types of nodes (without element names) were being
omitted from the R tree in a simple call to xmlTreeParse("filename") (i.e. with no handlers).
Version 0.97-7
* Documentation updates.
Version 0.97-6
* useInternalNodes added to xmlTreeParse() and htmlTreeParse().
This allows one to avoid the overhead of converting the contents of nodes to
R objects for each handler function call. Also, can access parents, siblings,
etc. from within a handler function.
* Included parameterizations for Windows from Uwe Ligges to aid automated-building
and finding the libxml DLL at run time.
Version 0.97-5
* Methods for accessing component of XMLInternalDocument and XMLInternalNode objects,
e.g. xmlName, xmlNamespace, xmlAttrs, xmlChildren
* saveXML.XMLInternalDOM now supports specification of a Doctype (see Doctype).
* saveXML uses NextMethod and arguments are transferred. Identified by Vincent Carey.
* Suppress warnings from R CMD check.
* Change of the output file in saveXML() example to avoid conflict with Microsoft
Windows use of name con.xml.
Version 0.97-4
* Quote URI values in namespace definitions in print.XMLNode.
Version 0.97-3
* Added a method for xmlRoot for HTMLDocument
* Changed the maintainer email address.
Version 0.97-2
* Added cdata to the collection of functions that are used in the handlers
for xmlEventParse(). Omission identified by Jeff Gentry.
* Fixed the maintainer email address to duncan@wald.ucdavis.edu
Version 0.97-1
* Put the correct S3method declarations in the NAMESPACE.
Version 0.97-0
* Using a NAMESPACE for the package
Version 0.96-0
* Using libxml2 by default rather than libxml.
* Fixed typo. in PACKAGE when initializing the library.
Version 0.95-7
* When creating a namespace identifier, if the namespace doesn't have an href, then we put
in an string.
Version 0.95-6
* Documentation updates for synchronization with the code.
Version 0.95-5
* Trivial bug of including extra arguments in call to UseMethod for
dtdElementValidEntry that generated warnings.
Version 0.95-4
* Configuration now tries to find libxml 1, then libxml 2 unless explicitly
instructed to find libxml 2 via --with-libxml2. So the change is to pick
up libxml 2 if libxml 1 is not found rather than signal an error.
Version 0.95-3
* Remove the need to define xmlParserError. Instead, set the value of the error
routine/function pointer to our error handler in the different default handlers
in libxml. We now initialize these default objects when we load the library.
* When setting the environment variables LIBXML_INCDIR and LIBXML_LIBDIR, one
needs to specify the -I and -L prefixes for the compiler and linker respectively
in front of directory names.
* Detect whether the routine for xmlHashScan (in libxml2) provides a return value
or not. This changed in version 2.4.21 of libxml2.
Version 0.95-2
* Configuration detects Darwin and handles multiplicity of xmlParserError
symbol.
Version 0.95-1
* Configuration now supports the specification of the xml-config script
to use via the environment variable XML_CONFIG or the --with-xml-config
as in --with-xml-config=xml2-config
* Recognize file:/// prefix as URL and not switch to treating file name as
XML text.
Version 0.95-0
* Event-driven parsing (SAX) can take a connection object or a function
that is called when the parser needs more input. See the documentation
for xmlEventParse().
* Classes and methods explicitly created during the installation.
This will cause problems with namespaces until the saving of the image
model works with namespaces.
Version 0.94-1
* Minor change to configuration script to avoid -L-L in specification of
directory for XML library (libxml).
Version 0.94-0
* Use registration of C routines
* Added methods for saveXML for XMLNode and XMLOutputStream objects.
Version 0.93-4
* replaceEntities argument for xmlEventParse.
* S4 SAX methods assigned to the correct database.
Version 0.93-3
* Correct support for DTDs and namespaces in the internal nodes
used in xmlTree(). Errors identified by Vincent Carey.
Version 0.93-2
* Bug in trimming white space discovered by Ott Toomet.
Version 0.93-1
* Documentation updates. Included xmlGetAttr.Rd.
Version 0.93-0
* Added toString.XMLNode
* Fixed the printing of degenerate namespaces in an XML node,
i.e. the spurious `:'.
Version 0.92-2
* Fixed C bug caused by using namespace without a suffix,
e.g. xmlns="http:...." assumed prefix was present.
Thanks to David Meyer.
Version 0.92-1
* Display the namespace definitions when printing an XMLNode object.
* New addAttributeNamespaces argument for xmlTreeParse() that controls whether
namespaces are included in attribute names.
Version 0.92-0
* XMLNode class now contains a field for namespace definitions
The `namespace' field is a character string identifying the prefix's
namespace. The `namespaceDefinition' field contains the full definitions
of each of the namespaces defined within a node.
* Printing of XLM nodes displays the namespace.
* xmlName() takes a `full' argument that controls whether the
namespace prefix is prepended to the tag name.
Version 0.91-0
* Added a mechanism to the SAX parser to allow a state object
be passed between the callbacks and returned as the result of
the parsing. This avoids the need for closures. Also, works
with S4 classes and the genericSAXHandlers() methods by allowing
one to write methods for these generic callbacks that dispatch
based on the type of the state object.
* Fix to make work properly with S4 class system.
Version 0.9-1
* Formatting of the help files to avoid long lines
identified by Ott Toomet
* Addition of `ignoreComments' argument for xmlValue()
* Date in the DESCRIPTION file corrected (thanks to Doug Bates).
Version 0.9-0
* Added addCData() and addPI() to the handlers of the different
XMLOutputStream classes.
Code for XMLInternalDOM (i.e. xmlTree()) from Byron Ellis.
* print() method for XMLProcessingInstruction node has the terminating `?'
as in .
Version 0.8-2
* Changes to support libxml2-2.4.21 (specifically the issues with
the headers and parse error regarding xmlValidCtxt). Thanks to
Wolfgang Huber for identifying this.
* Ignoring R_VERSION now, so dependency is R >= 1.2.0
Version 0.8-1
* Added an `attrs' argument to the xmlOutputBuffer and xmlTree
functions for specifying the top-level node.
Version 0.8-0
* xmlValue() extended to work recursively if a node has
only one child.
* T and F replaced by TRUE and FALSE
Version 0.7-4
* Support for Windows
Version 0.7-3
* Documents without are handled correctly.
* Configuration tweak to set LD_LIBRARY_PATH to handle the case
that the user specifies LIBXML_LIBDIR and it is needed to run the
version test.
* Keyword XML changed to IO.
Version 0.7-2
* Fix for printing XMLNode objects to handle comments and elements
with name "text". Identified by Andrew Schuh.
Version 0.7-1
* Minor fixes for passing R CMD check.
Version 0.7-0
* Generating XML trees using internal libxml structures:
xmlTree(), newXMLDoc(), newXMLNode(), saveXML().
* Support parsing HTML (htmlTreeParse()) using DOM.
Suggestion from Luis Torgo.
* Additional updates for libxml2, relating to DTDs.
Version 0.6-3
* Installation using --with-xml2 now attempts to link against libxml2.so
and the appropriate header files.
* Use libxml's xml-config or xml2-config scripts if these are available.
Version 0.6
* xmlDOMApply for recursively applying a function to each node in a tree.
Version 0.5-1
* simplification of xmlOutputBuffer so that it doesn't put
the namespace definition in each and every tag.
* configuration changes to support libxml2-2.3.6
(look for libxml2, check if xmlHashSize is available)
* now dropping nodes if the handler function returns NULL.
Updated documentation.
* spelling correction in the documentation
Version 0.5
* xmlOutputBuffer now accepts a connection.
* Fixes for using libxml2, specifically 2.2.12.
Also works for libxml2.2.8
* Enhanced configuration script to determine what features are available.
Version 0.4
* `namespace' handler in xmlTreeParse is called when a namespace
declaration is encountered. This is called before the child nodes
are processed.
* More documentation, in Tour.
* xmlValue, xmlApply, xmlSApply, xmlRoot, xmlNamespace, length, names
* Constructors for different types of nodes: XMLNode, XMLTextNode, XMLProcessingInstruction.
* Methods for print(), subsetting ([ and [[), accessing the fields
in an XMLNode object.
* New classes for the different node types (e.g. XMLTextNode)
* Event driven parsing available via libxml. Expat is not needed but
can be used.
* Document sources can be URLs (ftp and http) when using the libxml parser.
* Examples for processing MathML and SVG files. See examples/ directory.
* Examples for event driven parsing.
* Class of result from xmlTreeParse is XMLDocument.
* Comments, Entities, Text, etc. inherit from XMLNode
in addition to defining their own XML class.
XML/man/ 0000755 0001760 0000144 00000000000 13610406572 011521 5 ustar ripley users XML/man/xmlClone.Rd 0000644 0001760 0000144 00000003571 13607575373 013613 0 ustar ripley users \name{xmlClone}
\alias{xmlClone}
\alias{xmlClone,XMLInternalNode-method}
\alias{xmlClone,XMLInternalDocument-method}
\title{Create a copy of an internal XML document or node}
\description{
These methods allow the caller to create a copy of
an XML internal node. This is useful, for example, if we want to use
the node or document in an additional context, e.g.
put the node into another document while leaving it in the
existing document. Similarly, if we want to remove nodes to simplify
processing, we probably want to copy it so that the changes are not
reflected in the original document.
At present, the newly created object is not garbage collected.
}
\usage{
xmlClone(node, recursive = TRUE, addFinalizer = FALSE, ...)
}
\arguments{
\item{node}{the object to be cloned}
\item{recursive}{a logical value indicating whether the
entire object and all its descendants should be duplicated/cloned (\code{TRUE})
or just the top-level object (\code{FALSE})}
\item{addFinalizer}{typically a logical value indicating whether to bring this
new object under R's regular garbage collection.
This can also be a reference to a C routine which is to be used as
the finalizer. See \code{\link[base]{getNativeSymbolInfo}}.
}
\item{\dots}{additional parameters for methods}
}
\value{
A new R object representing the object.
}
\references{libxml2}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlParse}}
\code{\link{newXMLNode}}
\code{\link{newXMLDoc}}
}
\examples{
doc =
xmlParse(paste0('Duncan',
'Temple Lang'))
au = xmlRoot(doc)[[1]]
# make a copy
other = xmlClone(au)
# change it slightly
xmlAttrs(other) = c(id = "dtl2")
# add it to the children
addChildren(xmlRoot(doc), other)
}
\keyword{IO}
\keyword{programming}
\concept{XML}
XML/man/getHTMLLinks.Rd 0000644 0001760 0000144 00000004402 13427007651 014256 0 ustar ripley users \name{getHTMLLinks}
\alias{getHTMLLinks}
\alias{getHTMLExternalFiles}
\title{Get links or names of external files in HTML document}
\description{
These functions allow us to retrieve either the links
within an HTML document, or the collection of names of
external files referenced in an HTML document.
The external files include images, JavaScript and CSS documents.
}
\usage{
getHTMLLinks(doc, externalOnly = TRUE, xpQuery = "//a/@href",
baseURL = docName(doc), relative = FALSE)
getHTMLExternalFiles(doc, xpQuery = c("//img/@src", "//link/@href",
"//script/@href", "//embed/@src"),
baseURL = docName(doc), relative = FALSE,
asNodes = FALSE, recursive = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{doc}{the HTML document as a URL, local file name, parsed
document or an XML/HTML node}
\item{externalOnly}{a logical value that indicates whether we should
only return links to external documents and not references to
internal anchors/nodes within this document, i.e. those that of the
form \code{#foo}.}
\item{xpQuery}{a vector of XPath elements which match the elements of interest}
\item{baseURL}{the URL of the container document. This is used
to resolve relative references/links.
}
\item{relative}{a logical value indicating whether to leave the
references as relative to the base URL or to expand them to their full paths.
}
\item{asNodes}{a logical value that indicates whether we want the actual
HTML/XML nodes in the document that reference external documents
or just the names of the external documents.}
\item{recursive}{a logical value that controls whether we recursively
process the external documents we find in the top-level document
examining them for their external files.}
}
\value{
\code{getHTMLLinks} returns a character vector of the links.
\code{getHTMLExternalFiles} returns a character vector.
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{getXIncludes}}
}
\examples{\donttest{ # site is flaky
getHTMLLinks("http://www.omegahat.net")
getHTMLLinks("http://www.omegahat.net/RSXML")
unique(getHTMLExternalFiles("http://www.omegahat.net"))
}}
\keyword{IO}
\keyword{programming}
XML/man/xmlSubset.Rd 0000644 0001760 0000144 00000005021 12665242441 013776 0 ustar ripley users \name{[.XMLNode}
\alias{[.XMLNode}
\alias{[[.XMLNode}
\alias{[[.XMLInternalElementNode}
\alias{[[.XMLDocumentContent}
\title{Convenience accessors for the children of XMLNode objects.}
\description{
These provide a simplified syntax for extracting the children
of an XML node.
}
\usage{
\method{[}{XMLNode}(x, ..., all = FALSE)
\method{[[}{XMLNode}(x, ...)
\method{[[}{XMLDocumentContent}(x, ...)
}
\arguments{
\item{x}{the XML node or the top-level document content in which the children are to be accessed.
The \code{XMLDocumentContent} is the container for the top-level node that also contains information
such as the URI/filename and XML version. This accessor method is merely a convenience to get
access to children of the top-level node.}
% \item{i}{index of the child of interest or the name of an XML element
% of interest. In this latter case, only the first matching element is
% returned, if any.}
\item{\dots}{the identifiers for the children to be retrieved,
given as integer indices, names, etc. in the usual format for the
generic \code{link{[}} and \code{link{[[}} operators}
\item{all}{logical value. When \dots is a character vector, a value
of \code{TRUE} for \code{all} means to retrieve all of the
nodes with those names rather than just the first one.
\code{FALSE} gives the usual result of subsetting a list by name
which gives just the first element.
This allows us to avoid the idiom
\code{node[ names(node) == "bob" ]}
which is complicated when node is the result of an inline
computation
and instead we use
\code{node["bob", all = TRUE]}.
}
}
\value{
A list or single element containing the
children of the XML node given by \code{obj}
and identified by \dots.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.omegahat.net/RSXML}}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlAttrs}}
\code{\link{[<-.XMLNode}}
\code{\link{[[<-.XMLNode}}
}
\examples{
f = system.file("exampleData", "gnumeric.xml", package = "XML")
top = xmlRoot(xmlTreeParse(f))
# Get the first RowInfo element.
top[["Sheets"]][[1]][["Rows"]][["RowInfo"]]
# Get a list containing only the first row element
top[["Sheets"]][[1]][["Rows"]]["RowInfo"]
top[["Sheets"]][[1]][["Rows"]][1]
# Get all of the RowInfo elements by position
top[["Sheets"]][[1]][["Rows"]][1:xmlSize(top[["Sheets"]][[1]][["Rows"]])]
# But more succinctly and accurately, get all of the RowInfo elements
top[["Sheets"]][[1]][["Rows"]]["RowInfo", all = TRUE]
}
\keyword{IO}
\keyword{file}
XML/man/XMLNode-class.Rd 0000644 0001760 0000144 00000003740 12030420663 014355 0 ustar ripley users \name{XMLNode-class}
\docType{class}
\alias{XMLAbstractNode-class}
\alias{XMLAbstractNode-class}
\alias{RXMLNode-class}
\alias{XMLNode-class}
\alias{XMLTreeNode-class}
\alias{XMLInternalNode-class}
\alias{XMLInternalTextNode-class}
\alias{XMLInternalElementNode-class}
\alias{XMLInternalCommentNode-class}
\alias{XMLInternalPINode-class}
\alias{XMLInternalCDataNode-class}
\alias{XMLAttributeDeclNode-class}
% Added later. Do they fit here.
\alias{XMLDocumentFragNode-class}
\alias{XMLDocumentNode-class}
\alias{XMLDocumentTypeNode-class}
\alias{XMLEntityDeclNode-class}
\alias{XMLNamespaceDeclNode-class}
\alias{XMLXIncludeStartNode-class}
\alias{XMLXIncludeEndNode-class}
\alias{XMLDTDNode-class}
\alias{coerce,XMLAbstractNode,Date-method}
\alias{coerce,XMLAbstractNode,POSIXct-method}
\alias{coerce,XMLAbstractNode,URL-method}
\alias{coerce,XMLAbstractNode,character-method}
\alias{coerce,XMLAbstractNode,integer-method}
\alias{coerce,XMLAbstractNode,logical-method}
\alias{coerce,XMLAbstractNode,numeric-method}
\alias{XMLNamespaceDefinitions-class}
\title{Classes to describe an XML node object.}
\description{These classes are intended to
represent an XML node, either directly in S or a reference to an internal
libxml node. Such nodes respond to queries about
their name, attributes, namespaces and children.
These are old-style, S3 class definitions at present.
}
\section{Slots}{
\describe{These are old-style S3 class definitions and do not have
formal slots}
}
\section{Methods}{
No methods defined with class "XMLNode" in the signature.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.xmlsoft.org}}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{xmlTree}}
\code{\link{newXMLNode}}
\code{\link{xmlNode}}
}
\examples{
# An R-level XMLNode object
a <- xmlNode("arg", attrs = c(default="T"),
xmlNode("name", "foo"), xmlNode("defaultValue","1:10"))
xmlAttrs(a) = c(a = 1, b = "a string")
}
\keyword{classes}
XML/man/xmlTreeParse.Rd 0000644 0001760 0000144 00000064303 12665242441 014433 0 ustar ripley users \name{xmlTreeParse}
\alias{xmlTreeParse}
\alias{htmlTreeParse}
\alias{htmlParse}
\alias{xmlInternalTreeParse}
\alias{xmlNativeTreeParse}
\alias{xmlParse}
\alias{xmlSchemaParse}
\title{XML Parser}
\description{
Parses an XML or HTML file or string containing XML/HTML content, and generates an R
structure representing the XML/HTML tree. Use \code{htmlTreeParse} when the content is known
to be (potentially malformed) HTML.
This function has numerous parameters/options and operates quite differently
based on their values.
It can create trees in R or using internal C-level nodes, both of
which are useful in different contexts.
It can perform conversion of the nodes into R objects using
caller-specified handler functions and this can be used to
map the XML document directly into R data structures,
by-passing the conversion to an R-level tree which would then
be processed recursively or with multiple descents to extract the
information of interest.
\code{xmlParse} and \code{htmlParse} are equivalent to the
\code{xmlTreeParse} and \code{htmlTreeParse} respectively,
except they both use a default value for the \code{useInternalNodes} parameter
of \code{TRUE}, i.e. they working with and return internal
nodes/C-level nodes. These can then be searched using
XPath expressions via \code{\link{xpathApply}} and
\code{\link{getNodeSet}}.
\code{xmlSchemaParse} is a convenience function for parsing an XML schema.
}
\usage{
xmlTreeParse(file, ignoreBlanks=TRUE, handlers=NULL, replaceEntities=FALSE,
asText=FALSE, trim=TRUE, validate=FALSE, getDTD=TRUE,
isURL=FALSE, asTree = FALSE, addAttributeNamespaces = FALSE,
useInternalNodes = FALSE, isSchema = FALSE,
fullNamespaceInfo = FALSE, encoding = character(),
useDotNames = length(grep("^\\\\.", names(handlers))) > 0,
xinclude = TRUE, addFinalizer = TRUE, error = xmlErrorCumulator(),
isHTML = FALSE, options = integer(), parentFirst = FALSE)
xmlInternalTreeParse(file, ignoreBlanks=TRUE, handlers=NULL, replaceEntities=FALSE,
asText=FALSE, trim=TRUE, validate=FALSE, getDTD=TRUE,
isURL=FALSE, asTree = FALSE, addAttributeNamespaces = FALSE,
useInternalNodes = TRUE, isSchema = FALSE,
fullNamespaceInfo = FALSE, encoding = character(),
useDotNames = length(grep("^\\\\.", names(handlers))) > 0,
xinclude = TRUE, addFinalizer = TRUE, error = xmlErrorCumulator(),
isHTML = FALSE, options = integer(), parentFirst = FALSE)
xmlNativeTreeParse(file, ignoreBlanks=TRUE, handlers=NULL, replaceEntities=FALSE,
asText=FALSE, trim=TRUE, validate=FALSE, getDTD=TRUE,
isURL=FALSE, asTree = FALSE, addAttributeNamespaces = FALSE,
useInternalNodes = TRUE, isSchema = FALSE,
fullNamespaceInfo = FALSE, encoding = character(),
useDotNames = length(grep("^\\\\.", names(handlers))) > 0,
xinclude = TRUE, addFinalizer = TRUE, error = xmlErrorCumulator(),
isHTML = FALSE, options = integer(), parentFirst = FALSE)
htmlTreeParse(file, ignoreBlanks=TRUE, handlers=NULL, replaceEntities=FALSE,
asText=FALSE, trim=TRUE, validate=FALSE, getDTD=TRUE,
isURL=FALSE, asTree = FALSE, addAttributeNamespaces = FALSE,
useInternalNodes = FALSE, isSchema = FALSE,
fullNamespaceInfo = FALSE, encoding = character(),
useDotNames = length(grep("^\\\\.", names(handlers))) > 0,
xinclude = TRUE, addFinalizer = TRUE, error = htmlErrorHandler,
isHTML = TRUE, options = integer(), parentFirst = FALSE)
htmlParse(file, ignoreBlanks = TRUE, handlers = NULL, replaceEntities = FALSE,
asText = FALSE, trim = TRUE, validate = FALSE, getDTD = TRUE,
isURL = FALSE, asTree = FALSE, addAttributeNamespaces = FALSE,
useInternalNodes = TRUE, isSchema = FALSE, fullNamespaceInfo = FALSE,
encoding = character(),
useDotNames = length(grep("^\\\\.", names(handlers))) > 0,
xinclude = TRUE, addFinalizer = TRUE,
error = htmlErrorHandler, isHTML = TRUE,
options = integer(), parentFirst = FALSE)
xmlSchemaParse(file, asText = FALSE, xinclude = TRUE, error = xmlErrorCumulator())
}
\arguments{
\item{file}{ The name of the file containing the XML contents.
This can contain \~ which is expanded to the user's
home directory.
It can also be a URL. See \code{isURL}.
Additionally, the file can be compressed (gzip)
and is read directly without the user having
to de-compress (gunzip) it.}
\item{ignoreBlanks}{ logical value indicating whether
text elements made up entirely of white space should be included
in the resulting `tree'. }
\item{handlers}{Optional collection of functions
used to map the different XML nodes to R
objects. Typically, this is a named list of functions,
and a closure can be used to provide local data.
This provides a way of filtering the tree as it is being
created in R, adding or removing nodes, and generally processing
them as they are constructed in the C code.
In a recent addition to the package (version 0.99-8),
if this is specified as a single function object,
we call that function for each node (of any type) in the underlying DOM tree.
It is invoked with the new node and its parent node.
This applies to regular nodes and also comments, processing
instructions, CDATA nodes, etc. So this function must be
sufficiently general to handle them all.
}
\item{replaceEntities}{
logical value indicating whether to substitute entity references
with their text directly. This should be left as False.
The text still appears as the value of the node, but there
is more information about its source, allowing the parse to be reversed
with full reference information.
}
\item{asText}{logical value indicating that the first argument,
`file',
should be treated as the XML text to parse, not the name of
a file. This allows the contents of documents to be retrieved
from different sources (e.g. HTTP servers, XML-RPC, etc.) and still
use this parser.}
\item{trim}{
whether to strip white space from the beginning and end of text strings.
}
\item{validate}{
logical indicating whether to use a validating parser or not, or in other words
check the contents against the DTD specification. If this is true, warning
messages will be displayed about errors in the DTD and/or document, but the parsing
will proceed except for the presence of terminal errors.
This is ignored when parsing an HTML document.
}
\item{getDTD}{
logical flag indicating whether the DTD (both internal and external)
should be returned along with the document nodes. This changes the
return type.
This is ignored when parsing an HTML document.
}
\item{isURL}{
indicates whether the \code{file} argument refers to a URL
(accessible via ftp or http) or a regular file on the system.
If \code{asText} is TRUE, this should not be specified.
The function attempts to determine whether the
data source is a URL by using \code{\link{grep}}
to look for http or ftp at the start of the string.
The libxml parser handles the connection to servers,
not the R facilities (e.g. \code{\link{scan}}).
}
\item{asTree}{this only applies when on passes a value for
the \code{handlers} argument and is used then to determine
whether the DOM tree should be returned or the \code{handlers}
object.
}
\item{addAttributeNamespaces}{a logical value indicating whether to
return the namespace in the names of the attributes within a node
or to omit them. If this is \code{TRUE}, an attribute such as
\code{xsi:type="xsd:string"} is reported with the name
\code{xsi:type}.
If it is \code{FALSE}, the name of the attribute is \code{type}.}
\item{useInternalNodes}{a logical value indicating whether
to call the converter functions with objects of class
\code{XMLInternalNode} rather than \code{XMLNode}.
This should make things faster as we do not convert the
contents of the internal nodes to R explicit objects.
Also, it allows one to access the parent and ancestor nodes.
However, since the objects refer to volatile C-level objects,
one cannot store these nodes for use in further computations within R.
They \dQuote{disappear} after the processing the XML document is completed.
If this argument is \code{TRUE} and no handlers are provided, the
return value is a reference to the internal C-level document pointer.
This can be used to do post-processing via XPath expressions using
\code{\link{getNodeSet}}.
This is ignored when parsing an HTML document.
}
\item{isSchema}{a logical value indicating whether the document
is an XML schema (\code{TRUE}) and should be parsed as such using
the built-in schema parser in libxml.}
\item{fullNamespaceInfo}{a logical value indicating whether
to provide the namespace URI and prefix on each node
or just the prefix. The latter (\code{FALSE}) is
currently the default as that was the original way the
package behaved. However, using
\code{TRUE} is more informative and we will make this
the default in the future.
This is ignored when parsing an HTML document.
}
\item{encoding}{ a character string (scalar) giving the encoding for the
document. This is optional as the document should contain its own
encoding information. However, if it doesn't, the caller can specify
this for the parser. If the XML/HTML document does specify its own
encoding that value is used regardless of any value specified by the
caller. (That's just the way it goes!) So this is to be used
as a safety net in case the document does not have an encoding and
the caller happens to know theactual encoding.
}
\item{useDotNames}{a logical value
indicating whether to use the
newer format for identifying general element function handlers
with the '.' prefix, e.g. .text, .comment, .startElement.
If this is \code{FALSE}, then the older format
text, comment, startElement, ...
are used. This causes problems when there are indeed nodes
named text or comment or startElement as a
node-specific handler are confused with the corresponding
general handler of the same name. Using \code{TRUE}
means that your list of handlers should have names that use
the '.' prefix for these general element handlers.
This is the preferred way to write new code.
}
\item{xinclude}{a logical value indicating whether
to process nodes of the form \code{}
to insert content from other parts of (potentially different)
documents. \code{TRUE} means resolve the external references;
\code{FALSE} means leave the node as is.
Of course, one can process these nodes oneself after document has
been parse using handler functions or working on the DOM.
Please note that the syntax for inclusion using XPointer
is not the same as XPath and the results can be a little
unexpected and confusing. See the libxml2 documentation for more details.
}
\item{addFinalizer}{a logical value indicating whether the
default finalizer routine should be registered to
free the internal xmlDoc when R no longer has a reference to this
external pointer object. This is only relevant when
\code{useInternalNodes} is \code{TRUE}.
}
\item{error}{a function that is invoked when the XML parser reports
an error.
When an error is encountered, this is called with 7 arguments.
See \code{\link{xmlStructuredStop}} for information about these
If parsing completes and no document is generated, this function is
called again with only argument which is a character vector of
length 0. This gives the function an opportunity to report all the
errors and raise an exception rather than doing this when it sees
th first one.
This function can do what it likes with the information.
It can raise an R error or let parser continue and potentially
find further errors.
The default value of this argument supplies a function that
cumulates the errors
If this is \code{NULL}, the default error handler function in the
package \code{\link{xmlStructuredStop}} is invoked and this will
raise an error in R at that time in R.
}
\item{isHTML}{a logical value that allows this function to be used for parsing HTML documents.
This causes validation and processing of a DTD to be turned off.
This is currently experimental so that we can implement
\code{htmlParse} with this same function.}
\item{options}{an integer value or vector of values that are combined
(OR'ed) together
to specify options for the XML parser. This is the same as the
\code{options} parameter for \code{\link{xmlParseDoc}}.
}
\item{parentFirst}{a logical value for use when we have handler
functions and are traversing the tree.
This controls whether we process
the node before processing its children, or process the children
before their parent node.}
}
\details{
The \code{handlers} argument is used similarly
to those specified in \link{xmlEventParse}.
When an XML tag (element) is processed,
we look for a function in this collection
with the same name as the tag's name.
If this is not found, we look for one named
\code{startElement}. If this is not found, we use the default
built in converter.
The same works for comments, entity references, cdata, processing instructions,
etc.
The default entries should be named
\code{comment}, \code{startElement},
\code{externalEntity},
\code{processingInstruction},
\code{text}, \code{cdata} and \code{namespace}.
All but the last should take the XMLnode as their first argument.
In the future, other information may be passed via \dots,
for example, the depth in the tree, etc.
Specifically, the second argument will be the parent node into which they
are being added, but this is not currently implemented,
so should have a default value (\code{NULL}).
The \code{namespace} function is called with a single argument which
is an object of class \code{XMLNameSpace}. This contains
\describe{
\item{id}{the namespace identifier as used to
qualify tag names;}
\item{uri}{the value of the namespace identifier,
i.e. the URI
identifying the namespace.}
\item{local}{a logical value indicating whether the definition
is local to the document being parsed.}
}
One should note that the \code{namespace} handler is called before the
node in which the namespace definition occurs and its children are
processed. This is different than the other handlers which are called
after the child nodes have been processed.
Each of these functions can return arbitrary values that are then
entered into the tree in place of the default node passed to the
function as the first argument. This allows the caller to generate
the nodes of the resulting document tree exactly as they wish. If the
function returns \code{NULL}, the node is dropped from the resulting
tree. This is a convenient way to discard nodes having processed their
contents.
}
\value{
By default ( when \code{useInternalNodes} is \code{FALSE},
\code{getDTD} is \code{TRUE}, and no
handler functions are provided), the return value is, an object of
(S3) class \code{XMLDocument}.
This has two fields named \code{doc} and \code{dtd}
and are of class \code{DTDList} and \code{XMLDocumentContent} respectively.
If \code{getDTD} is \code{FALSE}, only the \code{doc} object is returned.
The \code{doc} object has three fields of its own:
\code{file}, \code{version} and \code{children}.
\item{\code{file}}{The (expanded) name of the file containing the XML.}
\item{\code{version}}{A string identifying the version of XML used by the document.}
\item{\code{children}}{
A list of the XML nodes at the top of the document.
Each of these is of class \code{XMLNode}.
These are made up of 4 fields.
\itemize{
\item{\code{name}}{The name of the element.}
\item{\code{attributes}}{For regular elements, a named list
of XML attributes converted from the
}
\item{\code{children}}{List of sub-nodes.}
\item{\code{value}}{Used only for text entries.}
}
Some nodes specializations of \code{XMLNode}, such as
\code{XMLComment}, \code{XMLProcessingInstruction},
\code{XMLEntityRef} are used.
If the value of the argument getDTD is TRUE and the document refers
to a DTD via a top-level DOCTYPE element, the DTD and its information
will be available in the \code{dtd} field. The second element is a
list containing the external and internal DTDs. Each of these
contains 2 lists - one for element definitions and another for entities. See
\code{\link{parseDTD}}.
If a list of functions is given via \code{handlers},
this list is returned. Typically, these handler functions
share state via a closure and the resulting updated data structures
which contain the extracted and processed values from the XML
document can be retrieved via a function in this handler list.
If \code{asTree} is \code{TRUE}, then the converted tree is returned.
What form this takes depends on what the handler functions have
done to process the XML tree.
If \code{useInternalNodes} is \code{TRUE} and no handlers are
specified, an object of S3 class \code{XMLInternalDocument} is
returned. This can be used in much the same ways as an
\code{XMLDocument}, e.g. with \code{\link{xmlRoot}},
\code{\link{docName}} and so on to traverse the tree.
It can also be used with XPath queries via \code{\link{getNodeSet}},
\code{\link{xpathApply}} and \code{doc["xpath-expression"]}.
If internal nodes are used and the internal tree returned directly,
all the nodes are returned as-is and no attempt to
trim white space, remove ``empty'' nodes (i.e. containing only white
space), etc. is done. This is potentially quite expensive and so is
not done generally, but should be done during the processing
of the nodes. When using XPath queries, such nodes are easily
identified and/or ignored and so do not cause any difficulties.
They do become an issue when dealing with a node's chidren
directly and so one can use simple filtering techniques such as
\code{ xmlChildren(node)[ ! xmlSApply(node, inherits, "XMLInternalTextNode")]}
and even check the \code{\link{xmlValue}} to determine if it contains only
white space.
\code{ xmlChildren(node)[ ! xmlSApply(node, function(x) inherit(x,
"XMLInternalTextNode")] && trim(xmlValue(x)) == "")}
} }
\references{\url{http://xmlsoft.org}, \url{http://www.w3.org/xml}}
\author{Duncan Temple Lang }
\note{Make sure that the necessary 3rd party libraries are available.}
\seealso{ \link{xmlEventParse},
\code{\link{free}} for releasing the memory when
an \code{XMLInternalDocument} object is returned.
}
\examples{
fileName <- system.file("exampleData", "test.xml", package="XML")
# parse the document and return it in its standard format.
xmlTreeParse(fileName)
# parse the document, discarding comments.
xmlTreeParse(fileName, handlers=list("comment"=function(x,...){NULL}), asTree = TRUE)
# print the entities
invisible(xmlTreeParse(fileName,
handlers=list(entity=function(x) {
cat("In entity",x$name, x$value,"\n")
x}
), asTree = TRUE
)
)
# Parse some XML text.
# Read the text from the file
xmlText <- paste(readLines(fileName), "\n", collapse="")
print(xmlText)
xmlTreeParse(xmlText, asText=TRUE)
# with version 1.4.2 we can pass the contents of an XML
# stream without pasting them.
xmlTreeParse(readLines(fileName), asText=TRUE)
# Read a MathML document and convert each node
# so that the primary class is
# MathML
# so that we can use method dispatching when processing
# it rather than conditional statements on the tag name.
# See plotMathML() in examples/.
fileName <- system.file("exampleData", "mathml.xml",package="XML")
m <- xmlTreeParse(fileName,
handlers=list(
startElement = function(node){
cname <- paste(xmlName(node),"MathML", sep="",collapse="")
class(node) <- c(cname, class(node));
node
}))
# In this example, we extract _just_ the names of the
# variables in the mtcars.xml file.
# The names are the contents of the
# tags. We discard all other tags by returning NULL
# from the startElement handler.
#
# We cumulate the names of variables in a character
# vector named `vars'.
# We define this within a closure and define the
# variable function within that closure so that it
# will be invoked when the parser encounters a
# tag.
# This is called with 2 arguments: the XMLNode object (containing
# its children) and the list of attributes.
# We get the variable name via call to xmlValue().
# Note that we define the closure function in the call and then
# create an instance of it by calling it directly as
# (function() {...})()
# Note that we can get the names by parsing
# in the usual manner and the entire document and then executing
# xmlSApply(xmlRoot(doc)[[1]], function(x) xmlValue(x[[1]]))
# which is simpler but is more costly in terms of memory.
fileName <- system.file("exampleData", "mtcars.xml", package="XML")
doc <- xmlTreeParse(fileName, handlers = (function() {
vars <- character(0) ;
list(variable=function(x, attrs) {
vars <<- c(vars, xmlValue(x[[1]]));
NULL},
startElement=function(x,attr){
NULL
},
names = function() {
vars
}
)
})()
)
# Here we just print the variable names to the console
# with a special handler.
doc <- xmlTreeParse(fileName, handlers = list(
variable=function(x, attrs) {
print(xmlValue(x[[1]])); TRUE
}), asTree=TRUE)
# This should raise an error.
try(xmlTreeParse(
system.file("exampleData", "TestInvalid.xml", package="XML"),
validate=TRUE))
\dontrun{
# Parse an XML document directly from a URL.
# Requires Internet access.
xmlTreeParse("http://www.omegahat.net/Scripts/Data/mtcars.xml", asText=TRUE)
}
counter = function() {
counts = integer(0)
list(startElement = function(node) {
name = xmlName(node)
if(name \%in\% names(counts))
counts[name] <<- counts[name] + 1
else
counts[name] <<- 1
},
counts = function() counts)
}
h = counter()
xmlParse(system.file("exampleData", "mtcars.xml", package="XML"), handlers = h)
h$counts()
f = system.file("examples", "index.html", package = "XML")
htmlTreeParse(readLines(f), asText = TRUE)
htmlTreeParse(readLines(f))
# Same as
htmlTreeParse(paste(readLines(f), collapse = "\n"), asText = TRUE)
getLinks = function() {
links = character()
list(a = function(node, ...) {
links <<- c(links, xmlGetAttr(node, "href"))
node
},
links = function()links)
}
h1 = getLinks()
htmlTreeParse(system.file("examples", "index.html", package = "XML"),
handlers = h1)
h1$links()
h2 = getLinks()
htmlTreeParse(system.file("examples", "index.html", package = "XML"),
handlers = h2, useInternalNodes = TRUE)
all(h1$links() == h2$links())
# Using flat trees
tt = xmlHashTree()
f = system.file("exampleData", "mtcars.xml", package="XML")
xmlTreeParse(f, handlers = list(.startElement = tt[[".addNode"]]))
xmlRoot(tt)
doc = xmlTreeParse(f, useInternalNodes = TRUE)
sapply(getNodeSet(doc, "//variable"), xmlValue)
#free(doc)
# character set encoding for HTML
f = system.file("exampleData", "9003.html", package = "XML")
# we specify the encoding
d = htmlTreeParse(f, encoding = "UTF-8")
# get a different result if we do not specify any encoding
d.no = htmlTreeParse(f)
# document with its encoding in the HEAD of the document.
d.self = htmlTreeParse(system.file("exampleData", "9003-en.html",package = "XML"))
# XXX want to do a test here to see the similarities between d and
# d.self and differences between d.no
# include
f = system.file("exampleData", "nodes1.xml", package = "XML")
xmlRoot(xmlTreeParse(f, xinclude = FALSE))
xmlRoot(xmlTreeParse(f, xinclude = TRUE))
f = system.file("exampleData", "nodes2.xml", package = "XML")
xmlRoot(xmlTreeParse(f, xinclude = TRUE))
# Errors
try(xmlTreeParse(" & < "))
# catch the error by type.
tryCatch(xmlTreeParse(" & < "),
"XMLParserErrorList" = function(e) {
cat("Errors in XML document\n", e$message, "\n")
})
# terminate on first error
try(xmlTreeParse(" & < ", error = NULL))
# see xmlErrorCumulator in the XML package
f = system.file("exampleData", "book.xml", package = "XML")
doc.trim = xmlInternalTreeParse(f, trim = TRUE)
doc = xmlInternalTreeParse(f, trim = FALSE)
xmlSApply(xmlRoot(doc.trim), class)
# note the additional XMLInternalTextNode objects
xmlSApply(xmlRoot(doc), class)
top = xmlRoot(doc)
textNodes = xmlSApply(top, inherits, "XMLInternalTextNode")
sapply(xmlChildren(top)[textNodes], xmlValue)
# Storing nodes
f = system.file("exampleData", "book.xml", package = "XML")
titles = list()
xmlTreeParse(f, handlers = list(title = function(x)
titles[[length(titles) + 1]] <<- x))
sapply(titles, xmlValue)
rm(titles)
}
\keyword{file}
\keyword{IO}
XML/man/xmlToS4.Rd 0000644 0001760 0000144 00000002516 13607575142 013334 0 ustar ripley users \name{xmlToS4}
\alias{xmlToS4}
\alias{xmlToS4,XMLInternalNode-method}
\title{General mechanism for mapping an XML node to an S4 object}
\description{
This generic function and its methods recursively process
an XML node and its child nodes ( and theirs and so on)
to map the nodes to S4 objects.
This is the run-time function that corresponds to the
\code{\link{makeClassTemplate}} function.
}
\usage{
xmlToS4(node, obj = new(xmlName(node)), ...)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{node}{the top-level XML node to convert to an S4 object}
\item{obj}{the object whose slots are to be filled from the
information in the XML node}
\item{\dots}{additional parameters for methods}
}
\value{
The object \code{obj} whose slots have been modified.
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{makeClassTemplate}}
}
\examples{
txt = paste0("ABCXYZ',
3.54available")
doc = xmlParse(txt)
setClass("part", representation(name = "character",
type = "character",
cost = "numeric",
status= "character"))
xmlToS4(xmlRoot(doc)[["part"]])
}
\keyword{programming}
\keyword{IO}
\concept{meta-computing}
XML/man/xmlEventHandler.Rd 0000644 0001760 0000144 00000002572 12665242441 015120 0 ustar ripley users \name{xmlEventHandler}
\alias{xmlEventHandler}
\title{Default handlers for the SAX-style event XML parser}
\description{
This is a function that returns a closure instance
containing the default handlers for use with
\code{\link{xmlEventParse}} for parsing XML documents
via the SAX-style parsing.
}
\usage{
xmlEventHandler()
}
\details{
These handlers simply build up the DOM tree and thus
perform the same job as \code{xmlTreeParse}.
It is here more as an example, reference and a base
that users can extend.
}
\value{
The return value is a list of functions
which are used as callbacks by the internal XML parser
when it encounters certain XML elements/structures.
These include items such as the start of an element,
end of an element, processing instruction,
text node, comment, entity references and definitions, etc.
\item{startElement}{}
\item{endElement}{}
\item{processingInstruction}{}
\item{text}{}
\item{comment}{}
\item{externalEntity}{}
\item{entityDeclaration}{}
\item{cdata}{}
\item{dom}{}
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlEventParse}}
\code{\link{xmlTreeParse}}
}
\examples{
xmlEventParse(system.file("exampleData", "mtcars.xml", package="XML"),
handlers=xmlEventHandler())
}
\keyword{file}
\keyword{IO}
XML/man/catalogResolve.Rd 0000644 0001760 0000144 00000005350 12665242441 014767 0 ustar ripley users \name{catalogResolve}
\alias{catalogResolve}
\title{Look up an element via the XML catalog mechanism}
\description{
XML parsers use a catalog to map generic system and public addresses
to actual local files or potentially different remote files.
We can use a catalog to map a reference such as
\code{http://www.omegahat.net/XSL/} to a particular
directory on our local machine and then not have to
modify any of the documents if we move the local files to another
directory, e.g. install a new version in an alternate directory.
This function provides a mechanism to query the catalog to
resolve a URI, PUBLIC or SYSTEM identifier.
This is now vectorized, so accepts a character vector of
URIs and recycles \code{type} to have the same length.
If an entry is not resolved via the catalog system,
a \code{NA} is returned for that element.
To leave the value unaltered in this case, use \code{asIs = TRUE} .
}
\usage{
catalogResolve(id, type = "uri", asIs = FALSE, debug = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{id}{the name of the (generic) element to be resolved}
\item{type}{a string, specifying whether the lookup is for a uri,
system or public element}
\item{asIs}{a logical. If \code{TRUE} any element of \code{id} which
is not resolved by the catalog system will be left as given in the
call. If \code{FALSE}, such unresolved elements are identified
by \code{NA}.
}
\item{debug}{logical value indicating whether to turn on debugging
output written to the console (\code{TRUE}) or not (\code{FALSE}).}
}
\value{
A character vector. If the element was resolved,
the single element is the resolved value.
Otherwise, the character vector will contain no elements.
}
\references{
\url{http://www.xmlsoft.org}
\url{http://www.sagehill.net/docbookxsl/Catalogs.html} provides a short, succinct tutorial on catalogs.
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlTreeParse}}
}
\examples{
if(!exists("Sys.setenv")) Sys.setenv = Sys.putenv
Sys.setenv("XML_CATALOG_FILES" = system.file("exampleData", "catalog.xml", package = "XML"))
catalogResolve("-//OASIS//DTD DocBook XML V4.4//EN", "public")
catalogResolve("http://www.omegahat.net/XSL/foo.xsl")
catalogResolve("http://www.omegahat.net/XSL/article.xsl", "uri")
catalogResolve("http://www.omegahat.net/XSL/math.xsl", "uri")
# This one does not resolve anything, returning an empty value.
catalogResolve("http://www.oasis-open.org/docbook/xml/4.1.2/foo.xsl", "uri")
# Vectorized and returns NA for the first and /tmp/html.xsl
# for the second.
catalogAdd("http://made.up.domain", "/tmp")
catalogResolve(c("ddas", "http://made.up.domain/html.xsl"), asIs = TRUE)
}
\keyword{IO}
\concept{XML}
XML/man/getNodeSet.Rd 0000644 0001760 0000144 00000047477 13610036162 014066 0 ustar ripley users \name{getNodeSet}
\alias{getNodeSet}
\alias{xpathApply}
\alias{xpathSApply}
\alias{matchNamespaces}
\title{Find matching nodes in an internal XML tree/DOM}
\description{
These functions provide a way to find XML nodes that match a particular
criterion. It uses the XPath syntax and allows very powerful
expressions to identify nodes of interest within a document both
clearly and efficiently. The XPath language requires some
knowledge, but tutorials are available on the Web and in books.
XPath queries can result in different types of values such as numbers,
strings, and node sets. It allows simple identification of nodes
by name, by path (i.e. hierarchies or sequences of
node-child-child...), with a particular attribute or matching
a particular attribute with a given value. It also supports
functionality for navigating nodes in the tree within a query
(e.g. \code{ancestor()}, \code{child()}, \code{self()}),
and also for manipulating the content of one or more nodes
(e.g. \code{text}).
And it allows for criteria identifying nodes by position, etc.
using some counting operations. Combining XPath with R
allows for quite flexible node identification and manipulation.
XPath offers an alternative way to find nodes of interest
than recursively or iteratively navigating the entire tree in R
and performing the navigation explicitly.
One can search an entire document or start the search from a
particular node. Such node-based searches can even search up the tree
as well as within the sub-tree that the node parents. Node specific
XPath expressions are typically started with a "." to indicate the
search is relative to that node.
You can use several XPath 2.0 functions in the XPath
query. Furthermore, you can also register additional XPath
functions that are implemented either with R functions or C routines.
(See \code{xpathFuns}.)
The set of matching nodes corresponding to an XPath expression
are returned in R as a list. One can then iterate over these elements to process the
nodes in whatever way one wants. Unfortunately, this involves two loops -
one in the XPath query over the entire tree, and another in R.
Typically, this is fine as the number of matching nodes is reasonably small.
However, if repeating this on numerous files, speed may become an issue.
We can avoid the second loop (i.e. the one in R) by applying a function to each node
before it is returned to R as part of the node set. The result of the function
call is then returned, rather than the node itself.
One can provide an R expression rather than an R function for \code{fun}. This is expected to be a call
and the first argument of the call will be replaced with the node.
Dealing with expressions that relate to the default namespaces in the
XML document can be confusing.
\code{xpathSApply} is a version of \code{xpathApply}
which attempts to simplify the result if it can be converted
to a vector or matrix rather than left as a list.
In this way, it has the same relationship to \code{xpathApply}
as \code{\link[base]{sapply}} has to \code{\link[base]{lapply}}.
\code{matchNamespaces} is a separate function that is used to
facilitate
specifying the mappings from namespace prefix used in the
XPath expression and their definitions, i.e. URIs,
and connecting these with the namespace definitions in the
target XML document in which the XPath expression will be evaluated.
\code{matchNamespaces} uses rules that are very slightly awkard or
specifically involve a special case. This is because this mapping of
namespaces from XPath to XML targets is difficult, involving
prefixes in the XPath expression, definitions in the XPath evaluation
context and matches of URIs with those in the XML document.
The function aims to avoid having to specify all the prefix=uri pairs
by using "sensible" defaults and also matching the prefixes in the
XPath expression to the corresponding definitions in the XML
document.
The rules are as follows.
\code{namespaces} is a character vector. Any element that has a
non-trivial name (i.e. other than "") is left as is and the name
and value define the prefix = uri mapping.
Any elements that have a trivial name (i.e. no name at all or "")
are resolved by first matching the prefix to those of the defined
namespaces anywhere within the target document, i.e. in any node and
not just the root one.
If there is no match for the first element of the \code{namespaces}
vector, this is treated specially and is mapped to the
default namespace of the target document. If there is no default
namespace defined, an error occurs.
It is best to give explicit the argument in the form
\code{c(prefix = uri, prefix = uri)}.
However, one can use the same namespace prefixes as in the document
if one wants. And one can use an arbitrary namespace prefix
for the default namespace URI of the target document provided it is
the first element of \code{namespaces}.
See the 'Details' section below for some more information.
}
\usage{
getNodeSet(doc, path, namespaces = xmlNamespaceDefinitions(doc, simplify = TRUE),
fun = NULL, sessionEncoding = CE_NATIVE, addFinalizer = NA, ...)
xpathApply(doc, path, fun, ... ,
namespaces = xmlNamespaceDefinitions(doc, simplify = TRUE),
resolveNamespaces = TRUE, addFinalizer = NA, xpathFuns = list())
xpathSApply(doc, path, fun = NULL, ... ,
namespaces = xmlNamespaceDefinitions(doc, simplify = TRUE),
resolveNamespaces = TRUE, simplify = TRUE,
addFinalizer = NA)
matchNamespaces(doc, namespaces,
nsDefs = xmlNamespaceDefinitions(doc, recursive = TRUE, simplify = FALSE),
defaultNs = getDefaultNamespace(doc, simplify = TRUE))
}
\arguments{
\item{doc}{an object of class \code{XMLInternalDocument}}
\item{path}{a string (character vector of length 1) giving the
XPath expression to evaluate.}
\item{namespaces}{ a named character vector giving the
namespace prefix and URI pairs that are to be used
in the XPath expression and matching of nodes.
The prefix is just a simple string that acts as a short-hand
or alias for the URI that is the unique identifier for the
namespace.
The URI is the element in this vector and the prefix is the
corresponding element name.
One only needs to specify the namespaces in the XPath expression and
for the nodes of interest rather than requiring all the
namespaces for the entire document.
Also note that the prefix used in this vector is local only to the
path. It does not have to be the same as the prefix used in the
document to identify the namespace. However, the URI in this
argument must be identical to the target namespace URI in the
document. It is the namespace URIs that are matched (exactly)
to find correspondence. The prefixes are used only to refer to
that URI.
}
\item{fun}{a function object, or an expression or call, which is used when the result is a node set
and evaluated for each node element in the node set. If this is a call, the first argument is replaced
with the current node.
}
\item{...}{any additional arguments to be passed to \code{fun} for each
node in the node set.}
\item{resolveNamespaces}{a logical value indicating whether
to process the collection of namespaces and resolve those that have
no name by looking in the default namespace and the namespace
definitions within the target document to match by prefix.}
\item{nsDefs}{a list giving the namespace definitions in which to match
any prefixes. This is typically computed directly from the target
document and the default value is most appropriate.}
\item{defaultNs}{the default namespace prefix-URI mapping given as a
named character vector. This is not a namespace definition object.
This is used when matching a simple prefix that has no corresponding
entry in \code{nsDefs} and is the first element in the
\code{namespaces} vector.
}
\item{simplify}{a logical value indicating whether the function
should attempt to perform the simplification of the result
into a vector rather than leaving it as a list.
This is the same as \code{\link[base]{sapply}} does
in comparison to \code{\link[base]{lapply}}.
}
%XXX
\item{sessionEncoding}{experimental functionality and parameter related
to encoding.}
\item{addFinalizer}{a logical value or identifier for a C routine
that controls whether we register finalizers on the intenal
node.}
\item{xpathFuns}{a list containing either character strings, functions
or named elements containing the address of a C routine.
These identify functions that can be used in the XPath expression.
A character string identifies the name of the XPath function and the
R function of the same name (and located on the R search path).
A C routine to implement an XPath function is specified via a call
to \code{\link[base]{getNativeSymbolInfo}} and passing just the
address field. This is provided in the \code{list()} with a name which is
used as the name of the XPath function.
}
}
\details{
When a namespace is defined on a node in the XML document,
an XPath expressions must use a namespace, even if it is the default
namespace for the XML document/node.
For example, suppose we have an XML document
\code{...}
To find all the topic nodes, we might want to use
the XPath expression \code{"/help/topic"}.
However, we must use an explicit namespace prefix that is associated
with the URI \code{http://www.r-project.org/Rd} corresponding to the one in
the XML document.
So we would use
\code{getNodeSet(doc, "/r:help/r:topic", c(r = "http://www.r-project.org/Rd"))}.
As described above, the functions attempt to allow
the namespaces to be specified easily by the R user
and matched to the namespace definitions in the
target document.
This calls the libxml routine \code{xmlXPathEval}.
}
\value{
The results can currently be different
based on the returned value from the XPath expression evaluation:
\item{list}{a node set}
\item{numeric}{a number}
\item{logical}{a boolean}
\item{character}{a string, i.e. a single character element.}
If \code{fun} is supplied and the result of the XPath query is a node set,
the result in R is a list.
}
\references{\url{http://xmlsoft.org},
\url{http://www.w3.org/xml}
\url{http://www.w3.org/TR/xpath}
\url{http://www.omegahat.net/RSXML}
}
\author{Duncan Temple Lang }
\note{
In order to match nodes in the default name space for
documents with a non-trivial default namespace, e.g. given as
\code{xmlns="http://www.omegahat.net"}, you will need to use a prefix
for the default namespace in this call.
When specifying the namespaces, give a name - any name - to the
default namespace URI and then use this as the prefix in the
XPath expression, e.g.
\code{getNodeSet(d, "//d:myNode", c(d = "http://www.omegahat.net"))}
to match myNode in the default name space
\code{http://www.omegahat.net}.
This default namespace of the document is now computed for us and
is the default value for the namespaces argument.
It can be referenced using the prefix 'd',
standing for default but sufficiently short to be
easily used within the XPath expression.
More of the XPath functionality provided by libxml can and may be
made available to the R package.
Facilities such as compiled XPath expressions, functions, ordered node
information are examples.
Please send requests to the package maintainer.
}
\seealso{
\code{\link{xmlTreeParse}} with \code{useInternalNodes} as \code{TRUE}.
}
\examples{
doc = xmlParse(system.file("exampleData", "tagnames.xml", package = "XML"))
els = getNodeSet(doc, "/doc//a[@status]")
sapply(els, function(el) xmlGetAttr(el, "status"))
# use of namespaces on an attribute.
getNodeSet(doc, "/doc//b[@x:status]", c(x = "http://www.omegahat.net"))
getNodeSet(doc, "/doc//b[@x:status='foo']", c(x = "http://www.omegahat.net"))
# Because we know the namespace definitions are on /doc/a
# we can compute them directly and use them.
nsDefs = xmlNamespaceDefinitions(getNodeSet(doc, "/doc/a")[[1]])
ns = structure(sapply(nsDefs, function(x) x$uri), names = names(nsDefs))
getNodeSet(doc, "/doc//b[@omegahat:status='foo']", ns)[[1]]
# free(doc)
#####
f = system.file("exampleData", "eurofxref-hist.xml.gz", package = "XML")
e = xmlParse(f)
ans = getNodeSet(e, "//o:Cube[@currency='USD']", "o")
sapply(ans, xmlGetAttr, "rate")
# or equivalently
ans = xpathApply(e, "//o:Cube[@currency='USD']", xmlGetAttr, "rate", namespaces = "o")
# free(e)
# Using a namespace
f = system.file("exampleData", "SOAPNamespaces.xml", package = "XML")
z = xmlParse(f)
getNodeSet(z, "/a:Envelope/a:Body", c("a" = "http://schemas.xmlsoap.org/soap/envelope/"))
getNodeSet(z, "//a:Body", c("a" = "http://schemas.xmlsoap.org/soap/envelope/"))
# free(z)
# Get two items back with namespaces
f = system.file("exampleData", "gnumeric.xml", package = "XML")
z = xmlParse(f)
getNodeSet(z, "//gmr:Item/gmr:name", c(gmr="http://www.gnome.org/gnumeric/v2"))
#free(z)
#####
# European Central Bank (ECB) exchange rate data
# Data is available from "http://www.ecb.int/stats/eurofxref/eurofxref-hist.xml"
# or locally.
uri = system.file("exampleData", "eurofxref-hist.xml.gz", package = "XML")
doc = xmlParse(uri)
# The default namespace for all elements is given by
namespaces <- c(ns="http://www.ecb.int/vocabulary/2002-08-01/eurofxref")
# Get the data for Slovenian currency for all time periods.
# Find all the nodes of the form
slovenia = getNodeSet(doc, "//ns:Cube[@currency='SIT']", namespaces )
# Now we have a list of such nodes, loop over them
# and get the rate attribute
rates = as.numeric( sapply(slovenia, xmlGetAttr, "rate") )
# Now put the date on each element
# find nodes of the form
# and extract the time attribute
names(rates) = sapply(getNodeSet(doc, "//ns:Cube[@time]", namespaces ),
xmlGetAttr, "time")
# Or we could turn these into dates with strptime()
strptime(names(rates), "\%Y-\%m-\%d")
# Using xpathApply, we can do
rates = xpathApply(doc, "//ns:Cube[@currency='SIT']",
xmlGetAttr, "rate", namespaces = namespaces )
rates = as.numeric(unlist(rates))
# Using an expression rather than a function and ...
rates = xpathApply(doc, "//ns:Cube[@currency='SIT']",
quote(xmlGetAttr(x, "rate")), namespaces = namespaces )
#free(doc)
#
uri = system.file("exampleData", "namespaces.xml", package = "XML")
d = xmlParse(uri)
getNodeSet(d, "//c:c", c(c="http://www.c.org"))
getNodeSet(d, "/o:a//c:c", c("o" = "http://www.omegahat.net", "c" = "http://www.c.org"))
# since http://www.omegahat.net is the default namespace, we can
# just the prefix "o" to map to that.
getNodeSet(d, "/o:a//c:c", c("o", "c" = "http://www.c.org"))
# the following, perhaps unexpectedly but correctly, returns an empty
# with no matches
getNodeSet(d, "//defaultNs", "http://www.omegahat.net")
# But if we create our own prefix for the evaluation of the XPath
# expression and use this in the expression, things work as one
# might hope.
getNodeSet(d, "//dummy:defaultNs", c(dummy = "http://www.omegahat.net"))
# And since the default value for the namespaces argument is the
# default namespace of the document, we can refer to it with our own
# prefix given as
getNodeSet(d, "//d:defaultNs", "d")
# And the syntactic sugar is
d["//d:defaultNs", namespace = "d"]
# this illustrates how we can use the prefixes in the XML document
# in our query and let getNodeSet() and friends map them to the
# actual namespace definitions.
# "o" is used to represent the default namespace for the document
# i.e. http://www.omegahat.net, and "r" is mapped to the same
# definition that has the prefix "r" in the XML document.
tmp = getNodeSet(d, "/o:a/r:b/o:defaultNs", c("o", "r"))
xmlName(tmp[[1]])
#free(d)
# Work with the nodes and their content (not just attributes) from the node set.
# From bondsTables.R in examples/
\dontrun{## fails to download as from May 2017
doc =
htmlTreeParse("http://finance.yahoo.com/bonds/composite_bond_rates?bypass=true",
useInternalNodes = TRUE)
if(is.null(xmlRoot(doc)))
doc = htmlTreeParse("http://finance.yahoo.com/bonds?bypass=true",
useInternalNodes = TRUE)
# Use XPath expression to find the nodes
# ..
# as these are the ones we want.
if(!is.null(xmlRoot(doc))) {
o = getNodeSet(doc, "//div/table[@class='yfirttbl']")
}
# Write a function that will extract the information out of a given table node.
readHTMLTable =
function(tb)
{
# get the header information.
colNames = sapply(tb[["thead"]][["tr"]]["th"], xmlValue)
vals = sapply(tb[["tbody"]]["tr"], function(x) sapply(x["td"], xmlValue))
matrix(as.numeric(vals[-1,]),
nrow = ncol(vals),
dimnames = list(vals[1,], colNames[-1]),
byrow = TRUE
)
}
# Now process each of the table nodes in the o list.
tables = lapply(o, readHTMLTable)
names(tables) = lapply(o, function(x) xmlValue(x[["caption"]]))
}
# this illustrates an approach to doing queries on a sub tree
# within the document.
# Note that there is a memory leak incurred here as we create a new
# XMLInternalDocument in the getNodeSet().
f = system.file("exampleData", "book.xml", package = "XML")
doc = xmlParse(f)
ch = getNodeSet(doc, "//chapter")
xpathApply(ch[[2]], "//section/title", xmlValue)
# To fix the memory leak, we explicitly create a new document for
# the subtree, perform the query and then free it _when_ we are done
# with the resulting nodes.
subDoc = xmlDoc(ch[[2]])
xpathApply(subDoc, "//section/title", xmlValue)
free(subDoc)
txt =
''
doc = xmlInternalTreeParse(txt, asText = TRUE)
\dontrun{
# Will fail because it doesn't know what the namespace x is
# and we have to have one eventhough it has no prefix in the document.
xpathApply(doc, "//x:b")
}
# So this is how we do it - just say x is to be mapped to the
# default unprefixed namespace which we shall call x!
xpathApply(doc, "//x:b", namespaces = "x")
# Here r is mapped to the the corresponding definition in the document.
xpathApply(doc, "//r:a", namespaces = "r")
# Here, xpathApply figures this out for us, but will raise a warning.
xpathApply(doc, "//r:a")
# And here we use our own binding.
xpathApply(doc, "//x:a", namespaces = c(x = "http://www.r-project.org"))
# Get all the nodes in the entire tree.
table(unlist(sapply(doc["//*|//text()|//comment()|//processing-instruction()"],
class)))
## Use of XPath 2.0 functions min() and max()
doc = xmlParse('')
getNodeSet(doc, "//p[@age = min(//p/@age)]")
getNodeSet(doc, "//p[@age = max(//p/@age)]")
avg = function(...) {
mean(as.numeric(unlist(...)))
}
getNodeSet(doc, "//p[@age > avg(//p/@age)]", xpathFuns = "avg")
doc = xmlParse('')
getNodeSet(doc, "//ev[month-from-date(@date) > 7]",
xpathFuns = list("month-from-date" =
function(node) {
match(months(as.Date(as.character(node[[1]]))), month.name)
}))
}
\keyword{file}
\keyword{IO}
XML/man/saveXML.Rd 0000644 0001760 0000144 00000013466 13476127063 013346 0 ustar ripley users \name{saveXML}
\alias{saveXML}
\alias{saveXML.XMLInternalDocument}
\alias{saveXML.XMLInternalDOM}
\alias{saveXML.XMLInternalNode}
\alias{saveXML.XMLNode}
\alias{saveXML.XMLOutputStream}
\alias{coerce,XMLInternalDocument,character-method}
\alias{coerce,XMLInternalDOM,character-method}
\alias{coerce,XMLInternalNode,character-method}
\alias{saveXML,XMLFlatTree-method}
\alias{saveXML,XMLInternalDocument-method}
\alias{saveXML,XMLInternalDOM-method}
\alias{saveXML,XMLInternalNode-method}
\alias{saveXML,XMLNode-method}
\alias{saveXML,XMLOutputStream-method}
\alias{saveXML,HTMLInternalDocument-method}
\title{Output internal XML Tree}
\description{
Methods for writing the representation of an XML tree to a string or
file.
Originally this was intended to be used only for
DOMs (Document Object Models) stored in internal memory
created via \code{\link{xmlTree}}, but methods for
\code{XMLNode}, \code{XMLInternalNode} and \code{XMLOutputStream}
objects
(and others)
allow it to be generic for different representations of the
XML tree.
Note that the indentation when writing an internal C-based node
(XMLInternalNode) may not be as expected if there are text nodes
within the node.
Also, not all the parameters are meaningful for all methods.
For example, compressing when writing to a string is not
supported.
}
\usage{
saveXML(doc, file=NULL, compression=0, indent=TRUE, prefix = '\n',
doctype = NULL, encoding = getEncoding(doc), ...)
\method{saveXML}{XMLInternalDocument}(doc, file=NULL, compression=0, indent=TRUE, prefix = '\n',
doctype = NULL, encoding = getEncoding(doc), ...)
\method{saveXML}{XMLInternalDOM}(doc, file=NULL, compression=0, indent=TRUE, prefix = '\n',
doctype = NULL, encoding = getEncoding(doc), ...)
\method{saveXML}{XMLNode}(doc, file=NULL, compression=0, indent=TRUE, prefix = '\n',
doctype = NULL, encoding = getEncoding(doc), ...)
\method{saveXML}{XMLOutputStream}(doc, file=NULL, compression=0, indent=TRUE, prefix = '\n',
doctype = NULL, encoding = getEncoding(doc), ...)
}
\arguments{
\item{doc}{the document object representing the XML document.}
\item{file}{the name of the file to which the contents of the XML
nodes will be serialized.}
\item{compression}{an integer value between 0 and 9 indicating the
level of compression to use when saving the file. Higher values
indicate increased compression and hence smaller files
at the expense of computational time to do the compression and decompression.}
\item{indent}{a logical value indicating whether to indent
the nested nodes when serializing to the stream.}
\item{prefix}{a string that is written to the stream/connection before
the XML is output. If this is NULL, it is ignored. This allows us to
put the XML introduction/preamble at the beginning of the document
while allowing it to be omitted when we are outputting multiple
"documents" within a single stream.}
\item{doctype}{an object identifying the elements for the DOCTYPE in the output.
This can be a string or an object of class \code{Doctype}.}
\item{encoding}{a string indicating which encoding style to use. This
is currently ignored except in the method in \code{Sxslt} for saving a
document generated by applying an XSL style sheet to an XML document.}
\item{\dots}{extra parameters for specific methods}
}
\details{
One can create an internal XML tree (or DOM)
using \code{\link{newXMLDoc}} and \code{\link{newXMLNode}}.
\code{saveXML} allows one to generate a textual representation of
that DOM in human-readable and reusable XML format.
\code{saveXML} is a generic function that allows one to call
the rendering operation with either the top-level node
of the DOM or of the document object (of class \code{XMLInternalDocument}
that is used to
accumulate the nodes and with which the developer
adds nodes.
}
\value{
If \code{file} is not specified, the result is a character string containing
the resulting XML content.
If \code{file} is passed in the call,
}
\references{\url{http://www.w3.org/XML}, \url{http://www.omegahat.net/RSXML}}
\author{Duncan Temple Lang}
\seealso{
\code{\link{newXMLDoc}}
\code{\link{newXMLNode}}
\code{\link{xmlOutputBuffer}}
\code{\link{xmlOutputDOM}}
}
\examples{
b = newXMLNode("bob")
saveXML(b)
f = tempfile()
saveXML(b, f)
doc = xmlInternalTreeParse(f)
saveXML(doc)
con <- xmlOutputDOM()
con$addTag("author", "Duncan Temple Lang")
con$addTag("address", close=FALSE)
con$addTag("office", "2C-259")
con$addTag("street", "Mountain Avenue.")
con$addTag("phone", close=FALSE)
con$addTag("area", "908", attrs=c(state="NJ"))
con$addTag("number", "582-3217")
con$closeTag() # phone
con$closeTag() # address
saveXML(con$value(), file=file.path(tempdir(), "out.xml"))
# Work with entities
f = system.file("exampleData", "test1.xml", package = "XML")
doc = xmlRoot(xmlTreeParse(f))
outFile = tempfile()
saveXML(doc, outFile)
alt = xmlRoot(xmlTreeParse(outFile))
if(! identical(doc, alt) )
stop("Problems handling entities!")
con = textConnection("test1.xml", "w")
saveXML(doc, con)
close(con)
alt = get("test1.xml")
identical(doc, alt)
x = newXMLNode("a", "some text", newXMLNode("c", "sub text"), "more text")
cat(saveXML(x), "\n")
cat(as(x, "character"), "\n")
# Showing the prefix parameter
doc = newXMLDoc()
n = newXMLNode("top", doc = doc)
b = newXMLNode("bar", parent = n)
# suppress the
saveXML(doc, prefix = character())
# put our own comment in
saveXML(doc, prefix = "")
# or use a comment node.
saveXML(doc, prefix = newXMLCommentNode("This is an alternative prefix"))
}
\keyword{IO}
\keyword{file}
XML/man/xmlSearchNs.Rd 0000644 0001760 0000144 00000002461 12030412355 014231 0 ustar ripley users \name{xmlSearchNs}
\alias{xmlSearchNs}
\alias{coerce,XMLNamespaceRef,character-method}
\title{Find a namespace definition object by searching ancestor nodes}
\description{
This function allows one to search an XML tree from a particular node
and find the namespace definition for a given namespace prefix or URL.
This namespace definition can then be used to set it on a node to
make it the effective namespace for that node.
}
\usage{
xmlSearchNs(node, ns, asPrefix = TRUE, doc = as(node, "XMLInternalDocument"))
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{node}{an \code{XMLInternaElementNode}}
\item{ns}{a character string (vector of length 1).
If \code{asPrefix} is \code{TRUE}, this is the namespace
alias/prefix.
If \code{asPrefix} is \code{FALSE}, this is the URL of the namespace definition}
\item{asPrefix}{a logical value. See \code{ns}.}
\item{doc}{the XML document in which the node(s) are located}
}
\value{
An object of class XMLNamespaceRef.
}
\references{
libxml2
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{newXMLNode}}
}
\examples{
txt = ''
doc = xmlParse(txt)
bottom = xmlRoot(doc)[[1]][[1]]
xmlSearchNs(bottom, "r")
}
\keyword{programming}
\keyword{data}
XML/man/readHTMLList.Rd 0000644 0001760 0000144 00000003500 13427010054 014232 0 ustar ripley users \name{readHTMLList}
\alias{readHTMLList}
\alias{readHTMLList,HTMLInternalDocument-method}
\alias{readHTMLList,XMLInternalNode-method}
\alias{readHTMLList,character-method}
\title{Read data in an HTML list or all lists in a document}
\description{
This function and its methods are somewhat similar to
\code{\link{readHTMLTable}} but read the contents of
lists in an HTML document.
We can specify the URL of the document or
an already parsed document or an individual node within the document.
}
\usage{
readHTMLList(doc, trim = TRUE, elFun = xmlValue, which = integer(), ...)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{doc}{the URL of the document or the parsed HTML document
or an individual node.}
\item{trim}{a logical value indicating whether we should
remove leading and trailing white space in each list item when
returning it}
\item{elFun}{a function that is used to process each list item node
(\code{li}).
This provides an opportunity to customize how each node is processed,
for example accessing attributes on the list item or on its contents
such as links in the items.}
\item{which}{an index or name which or vector of same which identifies
which list nodes to process in the overall document. This is for
subsetting particular lists rather than processing them all.}
\item{\dots}{additional arguments passed to \code{\link{htmlParse}}
and for the specific methods.}
}
\value{
A list of character vectors or lists,
with one element for each list in the document.
If only one list is being read (by specifying \code{which} as a single
identifier), that is returned as is.
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{readHTMLTable}}
}
\examples{\donttest{
readHTMLList("http://www.omegahat.net")
}}
\keyword{IO}
\keyword{programming}
XML/man/Doctype-class.Rd 0000644 0001760 0000144 00000002706 12160531242 014517 0 ustar ripley users \name{Doctype-class}
\docType{class}
\alias{Doctype-class}
\title{Class to describe a reference to an XML DTD}
\description{This class is intended to identify a DTD by SYSTEM file and/or PUBLIC
catalog identifier. This is used in the DOCTYPE element of an XML document.}
\section{Objects from the Class}{
Objects can be created by calls to the constructor function \code{\link{Doctype}}.
}
\section{Slots}{
\describe{
\item{\code{name}:}{Object of class \code{"character"}. This is the name of the
top-level element in the XML document.}
\item{\code{system}:}{Object of class \code{"character"}. This is the name of the file on the
system where the DTD document can be found. Can this be a URI?}
\item{\code{public}:}{Object of class \code{"character"}. This gives the PUBLIC
identifier for the DTD that can be searched for in a catalog, for example to map the
DTD reference to a local system element.}
}
}
\section{Methods}{
There is a constructor function
and also methods for \code{\link[methods]{coerce}} to convert an object
of this class to a character.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.xmlsoft.org}}
\author{Duncan Temple Lang}
\seealso{
\code{\link{Doctype}}
\code{\link{saveXML}}
}
\examples{
d = Doctype(name = "section",
public = c("-//OASIS//DTD DocBook XML V4.2//EN",
"http://oasis-open.org/docbook/xml/4.2/docbookx.dtd"))
}
\keyword{classes}
XML/man/toHTML.Rd 0000644 0001760 0000144 00000002544 11741563530 013125 0 ustar ripley users \name{toHTML}
\alias{toHTML}
\alias{toHTML,vector-method}
\alias{toHTML,matrix-method}
\alias{toHTML,call-method}
\title{Create an HTML representation of the given R object, using
internal C-level nodes}
\description{
This generic function and the associated methods are
intended to create an HTML tree that represents the
R object in some intelligent manner.
For example, we represent a vector as a table
and we represent a matrix also as a table.
}
\usage{
toHTML(x, context = NULL)
}
\arguments{
\item{x}{ the R object which is to be represented via an HTML tree }
\item{context}{an object which provides context in which the node will
be used. This is currently arbitrary. It may be used, for example,
when creating HTML for R documentation and providing information
about variabes and functions that are available on that page
and so have internal links.
}
}
\details{
It would be nicer if we could pass additional arguments
to control whether the outer/parent layer is created,
e.g. when reusing code for a vector for a row of a matrix.
}
\value{
an object of class \code{XMLInternalNode}
}
%\references{ }
\author{Duncan Temple Lang}
\seealso{
The \code{R2HTML} package.
}
\examples{
cat(as(toHTML(rnorm(10)), "character"))
}
\keyword{IO}
\keyword{programming}
\concept{XML}
\concept{serialization}
\concept{data exchange}
XML/man/length.XMLNode.Rd 0000644 0001760 0000144 00000001605 12665242441 014542 0 ustar ripley users \name{length.XMLNode}
\alias{length.XMLNode}
\title{Determine the number of children in an XMLNode object.}
\description{
This function is a simple way to compute the number
of sub-nodes (or children) an \code{XMLNode} object
possesses.
It is provided as a convenient form of calling the
\code{\link{xmlSize}} function.
}
\usage{
\method{length}{XMLNode}(x)
}
\arguments{
\item{x}{the \code{XMLNode} object whose length is to be queried.}
}
\value{
An integer giving the number of sub-nodes
of this node.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlSize}}
\code{\link{xmlChildren}}
}
\examples{
doc <- xmlTreeParse(system.file("exampleData", "mtcars.xml", package="XML"))
r <- xmlRoot(doc, skip=TRUE)
length(r)
# get the last entry
r[[length(r)]]
}
\keyword{file}
XML/man/xmlToList.Rd 0000644 0001760 0000144 00000004524 11741563530 013755 0 ustar ripley users \name{xmlToList}
\alias{xmlToList}
\title{Convert an XML node/document to a more R-like list}
\description{
This function is an early and simple approach to converting
an XML node or document into a more typical R list containing
the data values directly (rather than as XML nodes).
It is useful for dealing with data that is returned from
REST requests or other Web queries or generally when parsing
XML and wanting to be able to access the content
as elements in a list indexed by the name of the node.
For example, if given a node of the form
\code{
text
a phrase
}
We would end up with a list with elements named "a", "b" and "c".
"a" would be the string "text", b would contain the named character
vector \code{c(foo = "1")} (i.e. the attributes) and "c" would
contain the list with two elements named "d" and ".attrs".
The element corresponding to "d" is a
character vector with the single element "a phrase".
The ".attrs" element of the list is the character vector of
attributes from the node \code{...}.
}
\usage{
xmlToList(node, addAttributes = TRUE, simplify = FALSE)
}
\arguments{
\item{node}{the XML node or document to be converted to an R list.
This can be an "internal" or C-level node (i.e. \code{\link{XMLInternalNode-class}})
or a regular R-level node (either \code{\link{XMLNode-class}} or \code{XMLHashNode}).}
\item{addAttributes}{a logical value which controls whether the attributes of an empty node
are added to the }
\item{simplify}{a logical value that controls whether we collapse
the list to a vector if the elements all have a common compatible
type. Basically, this controls whether we use \code{sapply} or \code{lapply}.
}
}
\value{
A list whose elements correspond to the children of the top-level nodes.
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{getNodeSet}} and \code{\link{xpathApply}}
\code{\link{xmlRoot}}, \code{\link{xmlChildren}}, \code{\link{xmlApply}}, \code{[[}, etc. for
accessing the content of XML nodes.
}
\examples{
tt =
'
text
a phrase
'
doc = xmlParse(tt)
xmlToList(doc)
# use an R-level node representation
doc = xmlTreeParse(tt)
xmlToList(doc)
}
\keyword{IO}
\keyword{data}
XML/man/XMLCodeFile-class.Rd 0000644 0001760 0000144 00000005705 12030432514 015143 0 ustar ripley users \name{XMLCodeFile-class}
%\Rdversion{1.1}
\docType{class}
\alias{XMLCodeFile-class}
\alias{XMLCodeDoc-class}
\alias{xmlCodeFile}
\alias{[[,XMLCodeFile-method}
%\alias{[[,XMLCodeFile,ANY,ANY-method}
\alias{[[,XMLCodeFile,ANY-method}
\alias{coerce,XMLCodeFile,XMLCodeDoc-method}
\alias{coerce,character,XMLCodeDoc-method}
\alias{coerce,character,XMLCodeFile-method}
\alias{source,XMLCodeFile-method}
\title{Simple classes for identifying an XML document containing R code}
\description{
These two classes allow the user to identify an XML document or file
as containing R code (amongst other content). Objects of either of these
classes can then be passed to \code{\link{source}} to read the
code into R and also used in \code{link{xmlSource}} to read just parts of it.
\code{XMLCodeFile} represents the file by its name;
\code{XMLCodeDoc} parses the contents of the file when the R object is created.
Therefore, an \code{XMLCodeDoc} is a snapshot of the contents at a moment in time
while an \code{XMLCodeFile} object re-reads the file each time and so reflects
any "asynchronous" changes.
}
\section{Objects from the Class}{
One can create these objects using coercion methods, e.g
\code{as("file/name", "XMLCodeFile")}
or \code{as("file/name", "XMLCodeDoc")}.
One can also use \code{xmlCodeFile}.
}
\section{Slots}{
\describe{
\item{\code{.Data}:}{Object of class \code{"character"}}
}
}
\section{Extends}{
Class \code{"\linkS4class{character}"}, from data part.
Class \code{"\linkS4class{vector}"}, by class "character", distance 2.
%Class \code{"\linkS4class{data.frameRowLabels}"}, by class "character", distance 2.
%Class \code{"\linkS4class{EnumerationValue}"}, by class "character", distance 2.
%Class \code{"\linkS4class{NodeIndex}"}, by class "character", distance 2.
%Class \code{"\linkS4class{RAnonymousFunctionOrCode}"}, by class "character", distance 2.
}
\section{Methods}{
\describe{
\item{[[}{\code{signature(x = "XMLCodeFile", i = "ANY", j = "ANY")}:
this method allows one to retrieve/access an individual R code element
in the XML document. This is typically done by specifying the value of the XML element's
"id" attribute.
}
\item{coerce}{\code{signature(from = "XMLCodeFile", to = "XMLCodeDoc")}:
parse the XML document from the "file" and treat the result as a
\code{XMLCodeDoc} object.
}
\item{source}{\code{signature(file = "XMLCodeFile")}: read and evaluate all the
R code in the XML document. For more control, use \code{\link{xmlSource}}.}
}
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlSource}}
}
\examples{
src = system.file("exampleData", "Rsource.xml", package = "XML")
# mark the string as an XML file containing R code
k = xmlCodeFile(src)
# read and parse the code, but don't evaluate it.
code = xmlSource(k, eval = FALSE)
# read and evaluate the code in a special environment.
e = new.env()
ans = xmlSource(k, envir = e)
ls(e)
}
\keyword{classes}
XML/man/addNode.Rd 0000644 0001760 0000144 00000003441 11741563530 013351 0 ustar ripley users \name{addNode}
\alias{addNode}
\alias{addNode.XMLHashTree}
\title{Add a node to a tree}
\description{
This generic function allows us to add a node to a tree
for different types of trees.
Currently it just works for XMLHashTree, but it could
be readily extended to the more general XMLFlatTree class.
However, the concept in this function is to change the tree
and return the node. This does not work unless the tree
is directly mutable without requiring reassignment,
i.e. the changes do not induce a new copy of the original tree object.
DOM trees which are lists of lists of lists do not fall into this category.
}
\usage{
addNode(node, parent, to, ...)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{node}{the node to be added as a child of the parent.}
\item{parent}{the parent node or identifier}
\item{to}{the tree object}
\item{\dots}{additional arguments that are understood by the different methods for the different types of
trees/nodes. These can include \code{attrs}, \code{namespace}, \code{namespaceDefinitions},
\code{.children}.}
}
\value{
The new node object.
For flat trees, this will be the \code{node} after it has been
coerced to be compatible with a flat tree, i.e. has an id and the
host tree added to it.
}
\references{\url{http://www.w3.org} }
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlHashTree}}
\code{\link{asXMLTreeNode}}
}
\examples{
tt = xmlHashTree()
top = addNode(xmlNode("top"), character(), tt)
addNode(xmlNode("a"), top, tt)
b = addNode(xmlNode("b"), top, tt)
c = addNode(xmlNode("c"), b, tt)
addNode(xmlNode("c"), top, tt)
addNode(xmlNode("c"), b, tt)
addNode(xmlTextNode("Some text"), c, tt)
xmlElementsByTagName(tt$top, "c")
tt
}
\keyword{IO}
\concept{tree}
XML/man/Doctype.Rd 0000644 0001760 0000144 00000003620 12160531304 013407 0 ustar ripley users \name{Doctype}
\alias{Doctype}
\alias{coerce,Doctype,character-method}
\title{Constructor for DTD reference}
\description{
This is a constructor for the \code{Doctype} class
that can be provided at the top of an XML document
to provide information about the class of document,
i.e. its DTD or schema.
Also, there is a method for converting such a \code{Doctype}
object to a character string.
}
\usage{
Doctype(system = character(), public = character(), name = "")
}
\arguments{
\item{system}{the system URI that locates the DTD. }
\item{public}{the identifier for locating the DTD in a catalog, for
example. This should be a character vector of length 2, giving
the public identifier and a URI. If just the public identifier
is given and a string is given for \code{system} argument,
the \code{system} value is used as the second element of
\code{public}.
The public identifer should be of the form
\code{+//creator//name//language}
where the first element is either + or -, and
the language is described by a code in the ISO 639 document.
}
\item{name}{the name of the root element in the document.
This should be the first parameter, but is left this way
for backward compatability. And }
}
\value{
An object of class \code{Doctype}.
}
\references{\url{http://www.w3.org/XML}
XML Elements of Style, Simon St. Laurent.
}
\author{ Duncan Temple Lang }
\seealso{
\code{\link{saveXML}}
}
\examples{
d = Doctype(name = "section",
public = c("-//OASIS//DTD DocBook XML V4.2//EN",
"http://oasis-open.org/docbook/xml/4.2/docbookx.dtd"))
as(d, "character")
# this call switches the system to the URI associated with the PUBLIC element.
d = Doctype(name = "section",
public = c("-//OASIS//DTD DocBook XML V4.2//EN"),
system = "http://oasis-open.org/docbook/xml/4.2/docbookx.dtd")
}
\keyword{IO}
XML/man/xmlHandler.Rd 0000644 0001760 0000144 00000002124 12665242441 014107 0 ustar ripley users \name{xmlHandler}
\alias{xmlHandler}
\title{ Example XML Event Parser Handler Functions}
\description{
A closure containing simple functions for the different
types of events potentially called by the \link{xmlEventParse},
and some tag-specific functions to illustrate how one can
add functions for specific DTDs and XML element types.
Contains a local \link{list} which can be mutated
by invocations of the closure's function.
}
\usage{
xmlHandler()
}
\value{
List containing the functions enumerated
in the closure definition along with the
\link{list}.
}
\author{Duncan Temple Lang}
\note{This is just an example.}
\seealso{\link{xmlEventParse}, \link{xmlTreeParse}}
\examples{
\dontrun{
xmlURL <- "http://www.omegahat.net/Scripts/Data/mtcars.xml"
xmlText <- paste(scan(xmlURL, what="", sep="\n"),"\n",collapse="\n")
}
xmlURL <- system.file("exampleData", "mtcars.xml", package="XML")
xmlText <- paste(readLines(xmlURL), "\n", collapse="")
xmlEventParse(xmlText, handlers = NULL, asText=TRUE)
xmlEventParse(xmlText, xmlHandler(), useTagName=TRUE, asText=TRUE)
}
\keyword{file}
\keyword{IO}
XML/man/supportsExpat.Rd 0000644 0001760 0000144 00000002331 12665242441 014712 0 ustar ripley users \name{supportsExpat}
\alias{supportsExpat}
\alias{supportsLibxml}
\title{ Determines which native XML parsers are being used.}
\description{
Use of the Gnome libxml and Expat parsers is supported
in this R/S XML package, but both need not be used when
compiling the package. These functions determine whether
each is available in the underlying native code.
}
\usage{
supportsExpat()
supportsLibxml()
}
\details{
One might to use different parsers to test
validity of a document in different ways and
to get different error messages. Additionally,
one parser may be more efficient than the other.
These methods allow one to write code in such
a way that one parser is preferred and is used
if it is available, but the other is used
if the first is not available.
}
\value{
Returns \code{TRUE} if the corresponding library
has been linked into the package.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlEventParse}}
}
\examples{
# use Expat if possible, otherwise libxml
fileName <- system.file("exampleData", "mtcars.xml", package="XML")
xmlEventParse(fileName, useExpat = supportsExpat())
}
\keyword{file}
XML/man/readSolrDoc.Rd 0000644 0001760 0000144 00000002165 12122116423 014203 0 ustar ripley users \name{readSolrDoc}
\alias{readSolrDoc}
\alias{readSolrDoc,XMLInternalDocument-method}
\alias{readSolrDoc,XMLInternalNode-method}
\alias{readSolrDoc,character-method}
\alias{readSolrDoc,AsIs-method}
\title{Read the data from a Solr document}
\description{
Solr documents are used to represent
general data in a reasonably simple format
made up of lists, integers, logicals, longs,
doubles, dates, etc. each with an optional name.
These correspond very naturally to R objects.
}
\usage{
readSolrDoc(doc, ...)
}
\arguments{
\item{doc}{the object containing the data. This can be the name of a
file, a parsed XML document or an XML node.}
\item{\dots}{additional parameters for the methods.}
}
\value{
An R object representing the data in the Solr document,
typically a named vector or named list.
}
\references{
Lucene text search system.
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{readKeyValueDB}},
\code{\link{xmlToList}},
\code{\link{xmlToDataFrame}},
\code{\link{xmlParse}}
}
\examples{
f = system.file("exampleData", "solr.xml", package = "XML")
readSolrDoc(f)
}
\keyword{IO}
\concept{Solr}
XML/man/xmlGetAttr.Rd 0000644 0001760 0000144 00000005472 12665242441 014115 0 ustar ripley users \name{xmlGetAttr}
\alias{xmlGetAttr}
\title{Get the value of an attribute in an XML node}
\description{
This is a convenience function that retrieves
the value of a named attribute in an XML node,
taking care of checking for its existence.
It also allows the caller to provide a default
value to use as the return value if the
attribute is not present.
}
\usage{
xmlGetAttr(node, name, default = NULL, converter = NULL,
namespaceDefinition = character(),
addNamespace = length(grep(":", name)) > 0)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{node}{the XML node}
\item{name}{the name of the attribute}
\item{default}{a value to use as the default return if the attribute
is not present in the XML node. }
\item{converter}{an optional function which if supplied is invoked
with the attribute value and the value returned.
This can be used to convert the string to an arbitrary
value which is useful if it is, for example, a number.
This is only called if the attribute exists within the node.
In other words, it is not applied to the \code{default} value.}
\item{namespaceDefinition}{a named character vector giving
name space prefixes and URIs to use when resolving for the
the attribute with a namespace.
The values are used to compare the name space prefix used in
the \code{name} given by the user to the name space
definition in the node to ensure they match.
This is important as we might ask for an attribute named
\code{r:width} assuming that the prefix \code{r} corresponded to the
URI \code{http://www.r-project.org}. However, there may
be a name space prefix \code{r} defined on the node that points
to a different URI and so this would be an erroneous match.
}
\item{addNamespace}{a logical value that indicates whether we should put the
namespace prefix on the resulting name.
This is passed on to \code{\link{xmlAttrs}} and so controls whether the resulting
attribute names have the prefix attached.
So one specifies \code{TRUE} for this argument if the attribute identifier
has a namespace prefix.
}
}
\details{
This just checks that the attribute list is
non-NULL and that there is an element with
the specified name.
}
\value{
If the
attribute is present,
the return value is a string which is the value of the attribute.
Otherwise, the value of \code{default} is returned.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlAttrs}}
}
\examples{
node <- xmlNode("foo", attrs=c(a="1", b="my name"))
xmlGetAttr(node, "a")
xmlGetAttr(node, "doesn't exist", "My own default value")
xmlGetAttr(node, "b", "Just in case")
}
\keyword{file}
XML/man/xmlParseDoc.Rd 0000644 0001760 0000144 00000005451 12665242441 014240 0 ustar ripley users \name{xmlParseDoc}
\Rdversion{1.1}
\alias{xmlParseDoc}
\alias{COMPACT}
\alias{DTDATTR}
\alias{DTDLOAD}
\alias{DTDVALID}
\alias{HUGE}
\alias{NOBASEFIX}
\alias{NOBLANKS}
\alias{NOCDATA}
\alias{NODICT}
\alias{NOENT}
\alias{NOERROR}
\alias{NONET}
\alias{NOWARNING}
\alias{NOXINCNODE}
\alias{NSCLEAN}
\alias{OLDSAX}
\alias{PEDANTIC}
\alias{RECOVER}
\alias{XINCLUDE}
\alias{OLD10}
\alias{SAX1}
\title{Parse an XML document with options controlling the parser.}
\description{
This function is a generalization of \code{\link{xmlParse}}
that parses an XML document. With this function, we can specify
a combination of different options that control the operation of the
parser. The options control many different aspects the parsing process
}
\usage{
xmlParseDoc(file, options = 1L, encoding = character(),
asText = !file.exists(file), baseURL = file)
}
\arguments{
\item{file}{the name of the file or URL or the XML content itself}
\item{options}{options controlling the behavior of the parser.
One specifies the different options as elements of an integer
vector. These are then bitwised OR'ed together. The possible options are
\code{RECOVER}, \code{NOENT}, \code{DTDLOAD},
\code{DTDATTR}, \code{DTDVALID}, \code{NOERROR}, \code{NOWARNING},
\code{PEDANTIC}, \code{NOBLANKS}, \code{SAX1}, \code{XINCLUDE},
\code{NONET}, \code{NODICT}, \code{NSCLEAN}, \code{NOCDATA},
\code{NOXINNODE}, \code{COMPACT}, \code{OLD10}, \code{NOBASEFIX},
\code{HUGE}, \code{OLDSAX}.
( These options are also listed in the (non-exported) variable
\code{parserOptions}.)
}
\item{encoding}{character string that provides the encoding of the
document if it is not explicitly contained within the document itself.}
\item{asText}{a logical value indicating whether \code{file} is the
XML content (\code{TRUE}) or the name of a file or URL (\code{FALSE})}
\item{baseURL}{the base URL used for resolving relative documents,
e.g. XIncludes. This is important if \code{file} is the actual XML
content rather than a URL}
}
\value{
An object of class \code{XMLInternalDocument}.
}
\references{libxml2}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlParse}}
}
\examples{
f = system.file("exampleData", "mtcars.xml", package="XML")
# Same as xmlParse()
xmlParseDoc(f)
txt =
'
'
xmlParseDoc(txt, NSCLEAN, asText = TRUE)
txt =
'
'
xmlParseDoc(txt, c(NSCLEAN, NOERROR), asText = TRUE)
}
\keyword{data}
\concept{XML}
XML/man/SAXMethods.Rd 0000644 0001760 0000144 00000004471 11741563530 013776 0 ustar ripley users \name{startElement.SAX}
\alias{startElement.SAX}
\alias{endElement.SAX}
\alias{text.SAX}
\alias{comment.SAX}
\alias{processingInstruction.SAX}
\alias{entityDeclaration.SAX}
\alias{.InitSAXMethods}
\alias{text.SAX,ANY,SAXState-method}
\alias{comment.SAX,ANY,SAXState-method}
\alias{endElement.SAX,ANY,SAXState-method}
\alias{startElement.SAX,ANY,ANY,SAXState-method}
\alias{processingInstruction.SAX,ANY,ANY,SAXState-method}
\alias{entityDeclaration.SAX,ANY,ANY,ANY,ANY,ANY,SAXState-method}
\title{Generic Methods for SAX callbacks}
\description{
This is a collection of generic functions
for which one can write methods
so that they are called in repsonse to
different SAX events.
The idea is that one defines methods for different
classes of the \code{.state} argument
and dispatch to different methods based on that
argument.
The functions represent the different SAX events.
}
\usage{
startElement.SAX(name, atts, .state = NULL)
endElement.SAX(name, .state = NULL)
comment.SAX(content, .state = NULL)
processingInstruction.SAX(target, content, .state = NULL)
text.SAX(content, .state = NULL)
entityDeclaration.SAX(name, base, sysId, publicId, notationName, .state = NULL)
.InitSAXMethods(where = "package:XML")
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{name}{the name of the XML element or entity being declared}
\item{atts}{named character vector of XML attributes}
\item{content}{the value/string in the processing instruction or comment}
\item{target}{the target of the processing instruction, e.g. the R in
\code{}}
\item{base}{x}
\item{sysId}{the system identifier for this entity}
\item{publicId}{the public identifier for the entity}
\item{notationName}{name of the notation specification}
\item{.state}{the state object on which the user-defined methods
should dispatch.}
\item{where}{the package in which the class and method definitions
should be defined. This is almost always unspecified.}
}
\value{
Each method should return the (potentially modified)
state value.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.xmlsoft.org}}
\author{Duncan Temple Lang}
\note{ This no longer requires the Expat XML parser to be installed.
Instead, we use libxml's SAX parser.}
\seealso{
\code{\link{xmlEventParse}}
}
%\examples{}
\keyword{file}
XML/man/AssignXMLNode.Rd 0000644 0001760 0000144 00000002524 12665242441 014430 0 ustar ripley users \name{[<-.XMLNode}
\alias{[<-.XMLNode}
\alias{[[<-.XMLNode}
\title{Assign sub-nodes to an XML node}
\description{
These functions allow one to assign a sub-node
to an existing XML node by name or index.
These are the assignment equivalents of the
subsetting accessor functions.
They are typically called indirectly
via the assignment operator, such as
\code{x[["myTag"]] <- xmlNode("mySubTag")}.
}
\usage{
\method{[}{XMLNode}(x, i) <- value
\method{[}{XMLNode}(x, i) <- value
\method{[[}{XMLNode}(x, i) <- value
}
\arguments{
\item{x}{the \code{XMLNode} object to which the sub-node is to be assigned.}
\item{i}{the identifier for the position in the list of children
of \code{x} into which the right-hand-side node(s) should be assigned.
These can be either numbers or names.}
\item{value}{one or more \code{XMLNode} objects which are to be the sub-nodes
of \code{x}.}
}
\value{
The XML node \code{x} containing the new or modified
nodes.
}
\references{\url{http://www.w3.org}, \url{http://www.omegahat.net/RSXML}}
\author{Duncan Templle Lang}
\seealso{
\code{\link{[.XMLNode}}
\code{\link{[[.XMLNode}}
\code{\link{append.xmlNode}}
\code{\link{xmlSize}}
}
\examples{
top <- xmlNode("top", xmlNode("next","Some text"))
top[["second"]] <- xmlCDataNode("x <- 1:10")
top[[3]] <- xmlNode("tag",attrs=c(id="name"))
}
\keyword{IO}
\keyword{file}
XML/man/dtdElementValidEntry.Rd 0000644 0001760 0000144 00000003472 12665242441 016107 0 ustar ripley users \name{dtdElementValidEntry}
\alias{dtdElementValidEntry.character}
\alias{dtdElementValidEntry.XMLElementContent}
\alias{dtdElementValidEntry.XMLElementDef}
\alias{dtdElementValidEntry.XMLOrContent}
\alias{dtdElementValidEntry.XMLSequenceContent}
\alias{dtdElementValidEntry}
\title{Determines whether an XML element allows a particular type of sub-element.}
\description{
This tests whether \code{name} is a legitimate tag to use as a
direct sub-element of the \code{element} tag according to the
definition of the \code{element} element in the specified DTD. This
is a generic function that dispatches on the element type, so that
different version take effect for \code{XMLSequenceContent},
\code{XMLOrContent}, \code{XMLElementContent}.
}
\usage{
dtdElementValidEntry(element, name, pos=NULL)
}
\arguments{
\item{element}{The \code{XMLElementDef} defining the tag
in which we are asking whether the sub-element can be used. }
\item{name}{The name of the sub-element about which we are
querying the list of sub-tags within \code{element}.
}
\item{pos}{An optional argument which, if supplied,
queries whether the \code{name} sub-element is valid
as the \code{pos}-th child of \code{element}.
}
}
\details{
This is not intended to be called directly, but
indirectly by the
\code{\link{dtdValidElement}} function.
}
\value{
Logical value indicating whether the sub-element
can appear in an \code{element} tag or not.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{parseDTD}},
\code{\link{dtdValidElement}},
\code{\link{dtdElement}}
}
\examples{
dtdFile <- system.file("exampleData", "foo.dtd",package="XML")
dtd <- parseDTD(dtdFile)
dtdElementValidEntry(dtdElement("variables",dtd), "variable")
}
\keyword{file}
XML/man/setXMLNamespace.Rd 0000644 0001760 0000144 00000002542 12030410326 014767 0 ustar ripley users \name{setXMLNamespace}
\alias{setXMLNamespace}
\title{Set the name space on a node}
\description{
This function sets the name space for an XML node, typically
an internal node. We can use it to either define a new namespace
and use that, or refer to a name space definition in an ancestor
of the current node.
}
\usage{
setXMLNamespace(node, namespace, append = FALSE)
}
\arguments{
\item{node}{the node on which the name space is to be set}
\item{namespace}{the name space to use for the node. This can be a
name space prefix (string) defined in an ancestor node, or a named
character vector of the form \code{c(prefix = URI)} that defines a
new namespace on this node, or we can use a name space object
created with \code{\link{newXMLNamespace}}.}
\item{append}{currently ignored.}
}
\value{
An object of class \code{XMLNamespaceRef} which is a reference to the
native/internal/C-level name space object.
}
%\references{}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{newXMLNamespace}}
\code{\link{removeXMLNamespaces}}
}
\examples{
# define a new namespace
e = newXMLNode("foo")
setXMLNamespace(e, c("r" = "http://www.r-project.org"))
# use an existing namespace on an ancestor node
e = newXMLNode("top", namespaceDefinitions = c("r" = "http://www.r-project.org"))
setXMLNamespace(e, "r")
e
}
\keyword{programming}
XML/man/genericSAXHandlers.Rd 0000644 0001760 0000144 00000004073 12665242441 015467 0 ustar ripley users \name{genericSAXHandlers}
\alias{genericSAXHandlers}
\title{SAX generic callback handler list}
\description{
This is a convenience function to get the collection
of generic functions that make up the callbacks
for the SAX parser.
The return value can be used directly
as the value of the \code{handlers}
argument in \code{\link{xmlEventParse}}.
One can easily specify a subset
of the handlers by giving the names of
the elements to include or exclude.
}
\usage{
genericSAXHandlers(include, exclude, useDotNames = FALSE)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{include}{if supplied, this gives the names of the subset of elements to
return.
}
\item{exclude}{if supplied (and \code{include} is not),
this gives the names of the elements to remove from the list of
functions.
}
\item{useDotNames}{ a logical value.
If this is \code{TRUE}, the names of the elements in the list of
handler functions are prefixed with '.'. This is the newer format
used to differentiate general element handlers and node-name-specific handlers.}
}
\value{
A list of functions.
By default, the elements are named
startElement, endElement, comment, text,
processingInstruction, entityDeclaration
and contain the corresponding
generic SAX callback function, i.e. given by
the element name with the .SAX suffix.
If \code{include} or \code{exclude} is specified,
a subset of this list is returned.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlEventParse}}
\code{\link{startElement.SAX}}
\code{\link{endElement.SAX}}
\code{\link{comment.SAX}}
\code{\link{processingInstruction.SAX}}
\code{\link{entityDeclaration.SAX}}
\code{\link{.InitSAXMethods}}
}
\examples{
\testonly{
# .InitSAXMethods()
names(genericSAXHandlers())
names(genericSAXHandlers(inc=c("startElement", "endElement", "text")))
names(genericSAXHandlers(ex=c("startElement", "endElement", "text")))
}
}
\keyword{file}
XML/man/getXMLErrors.Rd 0000644 0001760 0000144 00000002637 12665242441 014357 0 ustar ripley users \name{getXMLErrors}
\alias{getXMLErrors}
\title{Get XML/HTML document parse errors}
\description{
This function is intended to be a convenience for
finding all the errors in an XML or HTML document due
to being malformed, i.e. missing quotes on attributes,
non-terminated elements/nodes, incorrectly terminated
nodes, missing entities, etc.
The document is parsed and a list of the errors is returned
along with information about the file, line and column number.
}
\usage{
getXMLErrors(filename, parse = xmlParse, ...)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{filename}{ the identifier for the document to be parsed, one of
a local file name, a URL or the XML/HTML content itself}
\item{parse}{ the function to use to parse the document, usually
either \code{\link{xmlTreeParse}} or \code{\link{htmlTreeParse}}.
}
\item{\dots}{additional arguments passed to the function given by \code{parse}}
}
\value{
A list of S3-style \code{XMLError} objects.
}
\references{libxml2 (\url{http://xmlsoft.org})}
\author{Duncan Temple Lang}
\seealso{
\code{error} argument for \code{\link{xmlTreeParse}} and related functions.
}
\examples{
# Get the "errors" in the HTML that was generated from this Rd file
getXMLErrors(system.file("html", "getXMLErrors.html", package = "XML"))
\dontrun{
getXMLErrors("http://www.omegahat.net/index.html")
}
}
\keyword{IO}
\keyword{programming}
XML/man/addSibling.Rd 0000644 0001760 0000144 00000005726 11741563530 014063 0 ustar ripley users
\name{getSibling}
\alias{getSibling}
\alias{addSibling}
\title{Manipulate sibling XML nodes}
\description{
These functions allow us to both access the sibling node
to the left or right of a given node and so walk the chain
of siblings, and also to insert a new sibling
}
\usage{
getSibling(node, after = TRUE, ...)
addSibling(node, ..., kids = list(...), after = NA)
}
\arguments{
\item{node}{the internal XML node (XMLInternalNode)
whose siblings are of interest}
\item{\dots}{the XML nodes to add as siblings or children to node.}
\item{kids}{a list containing the XML nodes to add as siblings.
This is equivalent to ... but used when we already have the
nodes in a list rather than as individual objects. This is used in programmatic
calls to
\code{addSibling}
rather interactive use where we more commonly have
the individual node objects.
}
\item{after}{a logical value indicating whether to retrieve or add the
nodes to the right (\code{TRUE}) or to the left (\code{FALSE}) of this sibling.
}
}
\value{
\code{getSibling}
returns an object of class
XMLInternalNode (or some derived S3 class, e.g. XMLInternalTextNode)
\code{addSibling}
returns a list whose elements are the newly added
XML (internal) nodes.
}
\seealso{
\code{\link{xmlChildren}},
\code{\link{addChildren}}
\code{\link{removeNodes}}
\code{\link{replaceNodes}}
}
\examples{
# Reading Apple's iTunes files
#
# Here we read a "censored" "database" of songs from Apple's iTune application
# which is stored in a property list. The format is quite generic and
# the fields for each song are given in the form
#
# ArtistPerson's name
#
# So to find the names of the artists for all the songs, we want to
# find all the Artist nodes and then get their next sibling
# which has the actual value.
#
# More information can be found in .
#
fileName = system.file("exampleData", "iTunes.plist", package = "XML")
doc = xmlParse(fileName)
nodes = getNodeSet(doc, "//key[text() = 'Artist']")
sapply(nodes, function(x) xmlValue(getSibling(x)))
f = system.file("exampleData", "simple.xml", package = "XML")
tt = as(xmlParse(f), "XMLHashTree")
tt
e = getSibling(xmlRoot(tt)[[1]])
# and back to the first one again by going backwards along the sibling list.
getSibling(e, after = FALSE)
# This also works for multiple top-level "root" nodes
f = system.file("exampleData", "job.xml", package = "XML")
tt = as(xmlParse(f), "XMLHashTree")
x = xmlRoot(tt, skip = FALSE)
getSibling(x)
getSibling(getSibling(x), after = FALSE)
}
\keyword{IO}
XML/man/xmlHashTree.Rd 0000644 0001760 0000144 00000011673 13610030344 014232 0 ustar ripley users \name{xmlHashTree}
\alias{xmlHashTree}
\title{Constructors for trees stored as flat list of nodes with
information about parents and children.}
\description{
These (and related internal) functions allow us to represent trees as
a simple, non-hierarchical collection of nodes along with
corresponding tables that identify the parent and child relationships.
This is different from representing a tree as a list of lists of lists
... in which each node has a list of its own children. In a
functional language like R, it is not possible then for the children
to be able to identify their parents.
We use an environment to represent these flat trees. Since these are
mutable without requiring the change to be reassigned, we can modify a
part of the tree locally without having to reassign the top-level
object.
We can use either a list (with names) to store the nodes or a hash
table/associative array that uses names. There is a non-trivial
performance difference.
}
\usage{
xmlHashTree(nodes = list(), parents = character(), children = list(),
env = new.env(TRUE, parent = emptyenv()))
}
\arguments{
\item{nodes}{ a collection of existing nodes that are to be added to
the tree. These are used to initialize the tree. If this is
specified, you must also specify \code{children} and \code{parents}.
}
\item{parents}{ the parent relationships for the nodes given by \code{nodes}.}
\item{children}{the children relationships for the nodes given by \code{nodes}.}
\item{env}{an environment in which the information for the tree will
be stored. This is essentially the tree object as it allows us to
modify parts of the tree without having to reassign the top-level
object. Unlike most R data types, environments are mutable.
}
}
\value{
An \code{xmlHashTree} object has an accessor method via
\code{$} for accessing individual nodes within the tree.
One can use the node name/identifier in an expression such as
\code{tt$myNode} to obtain the element.
The name of a node is either its XML node name or if that is already
present in the tree, a machine generated name.
One can find the names of all the nodes using the
\code{objects} function since these trees are regular
environments in R.
Using the \code{all = TRUE} argument, one can also find the
\dQuote{hidden} elements that make define the tree's structure.
These are \code{.children} and \code{.parents}.
The former is an (hashed) environment. Each element is identified by the
node in the tree by the node's identifier (corresponding to the
name of the node in the tree's environment).
The value of that element is simply a character vector giving the
identifiers of all of the children of that node.
The \code{.parents} element is also an environemnt.
Each element in this gives the pair of node and parent identifiers
with the parent identifier being the value of the variable in the
environment. In other words, we look up the parent of a node
named 'kid' by retrieving the value of the variable 'kid' in the
\code{.parents} environment of this hash tree.
The function \code{.addNode} is used to insert a new node into the
tree.
The structure of this tree allows one to easily travers all nodes,
navigate up the tree from a node via its parent. Certain tasks are
more complex as the hierarchy is not implicit within a node.
}
\references{\url{http://www.w3.org/XML}}
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{xmlTree}}
\code{\link{xmlOutputBuffer}}
\code{\link{xmlOutputDOM}}
}
\examples{
f = system.file("exampleData", "dataframe.xml", package = "XML")
tr = xmlHashTree()
xmlTreeParse(f, handlers = list(.startElement = tr[[".addNode"]]))
tr # print the tree on the screen
# Get the two child nodes of the dataframe node.
xmlChildren(tr$dataframe)
# Find the names of all the nodes.
objects(tr)
# Which nodes have children
objects(tr$.children)
# Which nodes are leaves, i.e. do not have children
setdiff(objects(tr), objects(tr$.children))
# find the class of each of these leaf nodes.
sapply(setdiff(objects(tr), objects(tr$.children)),
function(id) class(tr[[id]]))
# distribution of number of children
sapply(tr$.children, length)
# Get the first A node
tr$A
# Get is parent node.
xmlParent(tr$A)
f = system.file("exampleData", "allNodeTypes.xml", package = "XML")
# Convert the document
r = xmlInternalTreeParse(f, xinclude = TRUE)
ht = as(r, "XMLHashTree")
ht
# work on the root node, or any node actually
as(xmlRoot(r), "XMLHashTree")
# Example of making copies of an XMLHashTreeNode object to create a separate tree.
f = system.file("exampleData", "simple.xml", package = "XML")
tt = as(xmlParse(f), "XMLHashTree")
xmlRoot(tt)[[1]]
xmlRoot(tt)[[1, copy = TRUE]]
table(unlist(eapply(tt, xmlName)))
# if any of the nodes had any attributes
# table(unlist(eapply(tt, xmlAttrs)))
}
\keyword{IO}
\concept{XML}
XML/man/addChildren.Rd 0000644 0001760 0000144 00000021407 12665242441 014217 0 ustar ripley users \name{addChildren}
\alias{addChildren}
\alias{xmlParent<-}
\alias{removeChildren}
\alias{removeNodes}
\alias{removeNodes.list}
\alias{removeNodes.XMLNodeSet}
\alias{removeNodes.XMLNodeList}
\alias{removeNodes.XMLInternalNode}
\alias{replaceNodes}
\alias{addAttributes}
\alias{removeAttributes}
\alias{addChildren,XMLInternalNode-method}
\alias{addChildren,XMLNode-method}
\alias{addAttributes,XMLInternalElementNode-method}
\alias{addAttributes,XMLNode-method}
\alias{removeAttributes,XMLInternalElementNode-method}
\alias{removeAttributes,XMLNode-method}
\title{Add child nodes to an XML node}
\description{
This collection of functions
allow us to add, remove and replace children from an XML node
and also to and and remove attributes on an XML node.
These are generic functions that work on
both internal C-level \code{XMLInternalElementNode} objects
and regular R-level \code{XMLNode} objects.
\code{addChildren} is similar to \code{\link{addNode}}
and the two may be consolidated into a single generic
function and methods in the future.
}
\usage{
addChildren(node, ..., kids = list(...), at = NA, cdata = FALSE, append = TRUE)
removeChildren(node, ..., kids = list(...), free = FALSE)
removeNodes(node, free = rep(FALSE, length(node)))
replaceNodes(oldNode, newNode, ...)
addAttributes(node, ..., .attrs = NULL,
suppressNamespaceWarning = getOption("suppressXMLNamespaceWarning", FALSE),
append = TRUE)
removeAttributes(node, ..., .attrs = NULL, .namespace = FALSE,
.all = (length(list(...)) + length(.attrs)) == 0)
%xmlParent(node) = value
}
\arguments{
\item{node}{the XML node whose state is to be modified,
i.e. to which the child nodes are to be added or whose attribute list
is to be changed.}
\item{\dots}{This is for use in interactive settings when specifying a collection of
values individuall. In programming contexts when one obtains the
collection as a vector or list from another call, use the
\code{kids} or \code{.attrs} parameter.
}
\item{kids}{when adding children to a node, this is a list of
children nodes which should be of
the same "type" (i.e. internal or R-level nodes)
as the \code{node} argument. However, they can also be
regular strings in which case they are converted to
XML text nodes.
For \code{removeChildren}, this is again a list
which identifies the child nodes to be
removed using
the integer identifier of the child, or
the name of the XML node (but this will only remove the first such
node and not necessarily do what you expect when there are
multiple nodes with the same name),
or the \code{XMLInternalNode} object itself.
}
\item{at}{if specified, an integer identifying
the position in the original
list of children at which the new children should be added.
The children are added after that child.
This can also be a vector of indices which is as long
as the number of children being added and specifies the position
for each child being added. If the vector is shorter than the
number of children being added, it is padded with NAs
and so the corresponding children are added at the end of the
list.
This parameter is only implemented for internal nodes at present.
}
\item{cdata}{a logical value which controls whether children that
are specified as strings/text are enclosed within a CDATA node
when converted to actual nodes. This value is passed on to the
relevant function that creates the text nodes, e.g.
\code{\link{xmlTextNode}} and \code{\link{newXMLTextNode}}.
}
\item{.attrs}{a character vector identifying the names of the
attributes. These strings can have name space prefixes,
e.g. \code{r:length}
and the namespaces will be resolved relative to the
list supported by \code{node} to ensure those namespaces are defined.
}
\item{.namespace}{This is currently ignored and may never be
supported.
The intent is to identify on which set of attributes the operation is
to perform - the name space declarations or the regular
node attributes.
This is a logical value indicating
if \code{TRUE} that the attributes of interested are name space declarations,
i.e. of the form \code{xmlns:prefix} or \code{xmlns}.
If a value of \code{FALSE} is supplied this indicates that we
are identifying regular attributes.
Note that we can still identify attributes with a name space
prefix as, e.g., \code{ns:attr} without this value
}
\item{free}{a logical value indicating whether to free the C-level
memory associated with the child nodes that were removed.
\code{TRUE} means to free that memory.
This is only applicable for the internal nodes created
with \code{xmlTree} and \code{newXMLNode} and related functions.
It is necessary as automated garbage collection is tricky in this
tree-based context spanning both R and C data structures and
memory managers.
}
\item{.all}{a logical value indicating whether to remove all of the
attributes within the XML node without having to specify them by
name.}
\item{oldNode}{the node which is to be replaced}
\item{newNode}{the node which is to take the place of
\code{oldNode} in the list of children of the parent of
\code{oldNode}}
\item{suppressNamespaceWarning}{a logical value or a character string.
This is used to control the situation when an XML node
or attribute is created with a name space prefix that currently has no
definition for that node.
This is not necessarily an error but can lead to one.
This argument controls whether a warning is issued
or if a separate function is called.
A value of \code{FALSE} means not to suppress the warning and
so it is issued. A value of \code{TRUE} causes the potential
problem to be ignored assuming that the namespace will be added
to this node or one of its ancestors at a later point.
And if this value is a character string, we search for a
function of that name and invoke it.
}
\item{append}{a logical value that indicates whether (\code{TRUE}) the specified
attributes or children should be added to the existing attributes on the XML node
(if any exist), or, if \code{FALSE} these should replace any existing attributes.}
}
%\details{}
\value{
Each of these functions returns the modified node.
For an internal node, this is the same R object and
only the C-level data structures have changed.
For an R \code{XMLNode} object, this is is an entirely
separate object from the original node.
It must be inserted back into its parent "node" or context if the changes are to be
seen in that wider context.
}
\references{
libxml2 \url{http://www.xmlsoft.org}
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlTree}}
\code{\link{newXMLNode}}
}
\examples{
b = newXMLNode("bob",
namespace = c(r = "http://www.r-project.org",
omg = "http://www.omegahat.net"))
cat(saveXML(b), "\n")
addAttributes(b, a = 1, b = "xyz", "r:version" = "2.4.1", "omg:len" = 3)
cat(saveXML(b), "\n")
removeAttributes(b, "a", "r:version")
cat(saveXML(b), "\n")
removeAttributes(b, .attrs = names(xmlAttrs(b)))
addChildren(b, newXMLNode("el", "Red", "Blue", "Green",
attrs = c(lang ="en")))
k = lapply(letters, newXMLNode)
addChildren(b, kids = k)
cat(saveXML(b), "\n")
removeChildren(b, "a", "b", "c", "z")
# can mix numbers and names
removeChildren(b, 2, "e") # d and e
cat(saveXML(b), "\n")
i = xmlChildren(b)[[5]]
xmlName(i)
# have the identifiers
removeChildren(b, kids = c("m", "n", "q"))
x <- xmlNode("a",
xmlNode("b", "1"),
xmlNode("c", "1"),
"some basic text")
v = removeChildren(x, "b")
# remove c and b
v = removeChildren(x, "c", "b")
# remove the text and "c" leaving just b
v = removeChildren(x, 3, "c")
\dontrun{
# this won't work as the 10 gets coerced to a
# character vector element to be combined with 'w'
# and there is no node name 10.
removeChildren(b, kids = c(10, "w"))
}
# for R-level nodes (not internal)
z = xmlNode("arg", attrs = c(default="TRUE"),
xmlNode("name", "foo"), xmlNode("defaultValue","1:10"))
o = addChildren(z,
"some text",
xmlNode("a", "a link",
attrs = c(href = "http://www.omegahat.net/RSXML")))
o
# removing nodes
doc = xmlParse("bob")
top = xmlRoot(doc)
top
removeNodes(list(top[[1]], top[[3]]))
# a and c have disappeared.
top
}
\keyword{IO }
\keyword{programming}
\concept{XML}
\concept{document tree}
XML/man/xmlDOMApply.Rd 0000644 0001760 0000144 00000004300 12665242441 014155 0 ustar ripley users \name{xmlDOMApply}
\alias{xmlDOMApply}
\title{Apply function to nodes in an XML tree/DOM.}
\description{
This recursively applies the specified function to each node in an
XML tree, creating a new tree,
parallel to the original input tree.
Each element in the new tree is the return
value obtained from invoking the specified function
on the corresponding element
of the original tree.
The order in which the function is recursively applied
is "bottom-up". In other words,
function is first applied to each of the children
nodes first and then to the parent node
containing the newly computed results for the children.
}
\usage{
xmlDOMApply(dom, func)
}
\arguments{
\item{dom}{a node in the XML tree or DOM on which to recursively
apply the given function.
This should not be the \code{XMLDocument}
itself returned from
\code{\link{xmlTreeParse}}
but an object of class \code{XMLNode}.
This is typically obtained by
calling \code{\link{xmlRoot}} on the
return value from \code{\link{xmlTreeParse}}.
}
\item{func}{
the function to be applied to each node in the XML tree.
This is passed the node object for the and the return
value is inserted into the new tree that is to be returned
in the corresponding position as the node being processed.
If the return value is \code{NULL}, this node is dropped from the tree.}
}
\details{
This is a native (C code) implementation that
understands the structure of an XML DOM returned
from \code{\link{xmlTreeParse}} and iterates
over the nodes in that tree.
}
\value{
A tree that parallels the structure in the
\code{dom} object passed to it.
}
\author{Duncan Temple Lang}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\seealso{\link{xmlTreeParse}}
\examples{
dom <- xmlTreeParse(system.file("exampleData","mtcars.xml", package="XML"))
tagNames <- function() {
tags <- character(0)
add <- function(x) {
if(inherits(x, "XMLNode")) {
if(is.na(match(xmlName(x), tags)))
tags <<- c(tags, xmlName(x))
}
NULL
}
return(list(add=add, tagNames = function() {return(tags)}))
}
h <- tagNames()
xmlDOMApply(xmlRoot(dom), h$add)
h$tagNames()
}
\keyword{file}
XML/man/xmlParserContextFunction.Rd 0000644 0001760 0000144 00000004006 12030432636 017033 0 ustar ripley users \name{xmlParserContextFunction}
\alias{xmlParserContextFunction}
\title{Identifies function as expecting an xmlParserContext argument}
\description{
This is a convenience function for setting the class of the
specified function to include \code{"XMLParserContextFunction"}.
This identifies it as expecting an
\code{xmlParserCtxt} object as its first argument.
The resulting function can be passed to the
internal/native XML parser as a handler/callback function.
When the parser calls it, it recognizes this class information
and includes a reference to the C-level \code{xmlParserCtxt}
object as the first argument in the call.
This \code{xmlParserCtxt} object can be used to gracefully
terminate the parsing (without an error),
and in the future will also provide access to details
about the current state of the parser,
e.g. the encoding of the file, the XML version,
whether entities are being replaced,
line and column number for each node processed.
}
\usage{
xmlParserContextFunction(f, class = "XMLParserContextFunction")
}
\arguments{
\item{f}{the function whose class information is to be augmented.}
\item{class}{the name of the class which is to be added to the \code{class}
attribute of the function.}
}
\value{
The function object \code{f} whose class attribute has been prepended
with the value of \code{class}.
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlInternalTreeParse}}/\code{\link{xmlParse}}
and the \code{branches} parameter of \code{\link{xmlEventParse}}.
}
\examples{
fun = function(context, ...) {
# do things to parse the node
# using the context if necessary.
cat("In XMLParserContextFunction\n")
xmlStopParser(context)
}
fun = xmlParserContextFunction(fun)
txt = ""
# doesn't work for xmlTreeParse()
# xmlTreeParse(txt, handlers = list(a = fun))
# but does in xmlEventParse().
xmlEventParse(txt, handlers = list(startElement = fun), asText = TRUE)
}
\keyword{IO}
\keyword{programming}
\concept{XML}
XML/man/xmlChildren.Rd 0000644 0001760 0000144 00000003345 11741563530 014267 0 ustar ripley users \name{xmlChildren}
\alias{xmlChildren}
\alias{xmlChildren<-}
\alias{xmlChildren.XMLNode}
\alias{xmlChildren.XMLInternalNode}
\alias{xmlChildren.XMLInternalDocument}
\alias{xmlChildren<-,XMLInternalNode-method}
\alias{xmlChildren<-,ANY-method}
\title{ Gets the sub-nodes within an XMLNode object. }
\description{
These functions provide access to the children of the given
XML node.
The simple accessor returns a list of child XMLNode objects within an
XMLNode object.
The assignment operator (\code{xmlChildren<-}) sets the
children of the node to the given value and returns the
updated/modified node. No checking is currently done
on the type and values of the right hand side. This allows
the children of the node to be arbitrary R objects. This can
be useful but means that one cannot rely on any structure in a node
being present..
}
\usage{
xmlChildren(x, addNames= TRUE, ...)
}
\arguments{
\item{x}{an object of class XMLNode.}
\item{addNames}{a logical value indicating whether to add the XML
names of the nodes as names of the R list.
This is only relevant for XMLInternalNode objects as XMLNode objects
in R already have R-level names.
}
\item{\dots}{additional arguments for the particular methods,
e.g. \code{omitTypes} for an XMLInternalNode.}
}
\value{
A list whose elements are sub-nodes of the user-specified
XMLNode. These are also of class XMLNode.
}
\references{\url{http://www.w3.org/XML}}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlChildren}},\code{\link{xmlSize}},
\code{\link{xmlTreeParse}}
}
\examples{
fileName <- system.file("exampleData", "mtcars.xml", package="XML")
doc <- xmlTreeParse(fileName)
names(xmlChildren(doc$doc$children[["dataset"]]))
}
\keyword{file}
XML/man/parseURI.Rd 0000644 0001760 0000144 00000003340 12665242441 013504 0 ustar ripley users \name{parseURI}
\alias{parseURI}
\alias{URI-class}
\alias{coerce,URI,character-method}
\title{Parse a URI string into its elements}
\description{
This breaks a URI given as a string into its different elements such as
protocol/scheme, host, port, file name, query.
This information can be used, for example, when constructing URIs
relative to a base URI.
The return value is an S3-style object of class \code{URI}.
This function uses libxml routines to perform the parsing.
}
\usage{
parseURI(uri)
}
\arguments{
\item{uri}{a single string}
}
\value{
A list with 8 elements
\item{scheme}{the name of the protocol being used, http, ftp as a string.}
\item{authority}{a string represeting a rarely used aspect of URIs}
\item{server}{a string identifying the host, e.g. www.omegahat.net}
\item{user}{a string giving the name of the user, e.g. in FTP
"ftp://duncan@www.omegahat.net", this would yield "duncan"}
\item{path}{a string identifying the path of the target file}
\item{query}{the CGI query part of the string, e.g.
the bit after '?' of the form \code{name=value&name=value}}
\item{fragment}{a string giving the coo}
\item{port}{an integer identifying the port number on which the
connection is to be made}
}
\seealso{
\code{\link{getRelativeURL}}
}
\examples{
parseURI("http://www.omegahat.net:8080/RCurl/index.html")
parseURI("ftp://duncan@www.omegahat.net:8080/RCurl/index.html")
parseURI("ftp://duncan@www.omegahat.net:8080/RCurl/index.html#my_anchor")
as(parseURI("http://duncan@www.omegahat.net:8080/RCurl/index.html#my_anchor"), "character")
as(parseURI("ftp://duncan@www.omegahat.net:8080/RCurl/index.html?foo=1&bar=axd"), "character")
}
\keyword{IO}
\concept{URI}
\concept{Web}
XML/man/getRelativeURL.Rd 0000644 0001760 0000144 00000004430 13610036261 014641 0 ustar ripley users \name{getRelativeURL}
\alias{getRelativeURL}
\title{Compute name of URL relative to a base URL}
\description{
This function is a convenience function for
computing the fullly qualified URI of a document
relative to a base URL.
It handles the case where the document is
already fully qualified and so ignores the base URL
or, alternatively, is a relative document name and so
prepends the base URL.
It does not (yet) try to be clever by collapsing
relative directories such as "..".
}
\usage{
getRelativeURL(u, baseURL, sep = "/", addBase = TRUE,
simplify = TRUE, escapeQuery = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{u}{the location of the target document whose fully qualified URI is to be determined.}
\item{baseURL}{the base URL relative to which the value of \code{u} should be interpreted.}
\item{sep}{the separator to use to separate elements of the path. For external URLs (e.g.
accessed via HTTP, HTTPS, FTP), / should be used. For local files on Windows machines
one might use \code{.Platform$file.sep}, but this is incorrect unless one knows that the
resulting file is to be accessed using Windows file system notation, i.e.
\code{C:\\\\my\\\\folder\\\\file}.
}
\item{addBase}{a logical controlling whether we prepend the base URL
to the result.}
\item{simplify}{a logical value that controls whether we attempt to
simplify/normalize the path to remove \code{..} and \code{.}}
\item{escapeQuery}{a logical value. Currently ignored.}
}
\details{
This uses the function \code{parseURI} to compute the components
of the different URIs.
}
\value{
A character string giving the fully qualified URI for
\code{u}.
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{parseURI}} which uses the libxml2 facilities for parsing URIs.
\code{\link{xmlParse}}, \code{\link{xmlTreeParse}}, \code{\link{xmlInternalTreeParse}}.
XInclude and XML Schema import/include elements for computing relative locations of included/imported files..
}
\examples{
getRelativeURL("http://www.omegahat.net", "http://www.r-project.org")
getRelativeURL("bar.html", "http://www.r-project.org/")
getRelativeURL("../bar.html", "http://www.r-project.org/")
}
\keyword{IO}
\keyword{programming}
\concept{XML}
XML/man/getXIncludes.Rd 0000644 0001760 0000144 00000005244 13610036304 014403 0 ustar ripley users \name{getXIncludes}
\alias{getXIncludes}
\alias{xmlXIncludes}
\title{Find the documents that are XInclude'd in an XML document}
\description{
The \code{getXMLIncludes} function finds the names of the documents
that are XIncluded in a given XML document,
optionally processing these documents recursively.
\code{xmlXIncludes} returns the hierarchy of included documents.
}
\usage{
getXIncludes(filename, recursive = TRUE, skip = character(),
omitPattern = "\\\\.(js|html?|txt|R|c)$",
namespace = c(xi = "http://www.w3.org/2003/XInclude"),
duplicated = TRUE)
xmlXIncludes(filename, recursive = TRUE,
omitPattern = "\\\\.(js|html?|txt|R|c)$",
namespace = c(xi = "http://www.w3.org/2003/XInclude"),
addNames = TRUE,
clean = NULL, ignoreTextParse = FALSE)
}
\arguments{
\item{filename}{the name of the XML document's URL or file or the
parsed document itself.}
\item{recursive}{a logical value controlling whether to recursively
process the XInclude'd files for their XInclude'd files}
\item{skip}{a character vector of file names to ignore or skip over}
\item{omitPattern}{a regular expression for indentifying files that
are included that we do not want to recursively process}
\item{namespace}{the namespace to use for the XInclude. There are two
that are in use 2001 and 2003.}
\item{duplicated}{a logical value that controls whether only the
unique names of the files are returned, or if we get all references
to all files.}
\item{addNames}{a logical that controls whether we add the name of
the parent file as the names vector for the collection of included
file names. This is useful, but sometimes we want to disable this,
e.g. to create a \code{JSON} representation of the hierarchy for use
in, e.g., D3.}
\item{clean}{how to process the names of the files. This can be a
function or a character vector of two regular expressions passed to
\code{gsub}.
The function is called with a vector of file names. The regular
expressions are used in a call to \code{gsub}.}
\item{ignoreTextParse}{if \code{TRUE}, ignore the XIncluded files that
are identified as text and not XML with \code{parse="text"}.}
}
\value{
If \code{recursive} is \code{FALSE}, a character vector giving the
names of the included files.
For \code{recursive} is \code{TRUE}, currently the same character
vector form. However, this will be a hierarchical list.
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{getHTMLExternalFiles}}
}
\examples{
f = system.file("exampleData", "xinclude", "a.xml", package = "XML")
getXIncludes(f, recursive = FALSE)
}
\keyword{IO}
\concept{XML}
XML/man/xmlEventParse.Rd 0000644 0001760 0000144 00000052603 13610036771 014613 0 ustar ripley users \name{xmlEventParse}
\alias{xmlEventParse}
\title{ XML Event/Callback element-wise Parser}
\description{
This is the event-driven or SAX (Simple API for XML)
style parser which process XML without building the tree
but rather identifies tokens in the stream of characters
and passes them to handlers which can make sense of them
in context.
This reads and processes the contents of an XML file or string by
invoking user-level functions associated with different
components of the XML tree. These components include
the beginning and end of XML elements, e.g
\code{}
and \code{} respectively,
comments, CDATA (escaped character data), entities, processing
instructions, etc.
This allows the caller to create the appropriate data structure from the
XML document contents rather than the default tree (see
\link{xmlTreeParse})
and so avoids having the entire document in memory.
This is important for large documents and where we would end up with
essentially 2 copies of the data in memory at once, i.e
the tree and the R data structure containing the information taken
from the tree.
When dealing with classes of XML documents whose instances could be large,
this approach is desirable but a little more cumbersome to program
than the standard DOM (Document Object Model) approach provided
by \code{XMLTreeParse}.
Note that \code{xmlTreeParse} does allow a hybrid style of
processing that allows us to apply handlers to nodes in the tree
as they are being converted to R objects. This is a style of
event-driven or asynchronous calling
In addition to the generic token event handlers such as
"begin an XML element" (the \code{startElement} handler), one can
also provide handler functions for specific tags/elements such
as \code{} with handler elements with the same name as the
XML element of interest, i.e. \code{"myTag" = function(x, attrs)}.
When the event parser is reading text nodes,
it may call the text handler function with different
sub-strings of the text within the node.
Essentially, the parser collects up n characters into a buffer and
passes this as a single string the text handler and then continues
collecting more text until the buffer is full or there is no more text.
It passes each sub-string to the text handler.
If \code{trim} is \code{TRUE}, it removes leading and trailing white
space from the substring before calling the text handler. If the
resulting text is empty and \code{ignoreBlanks} is \code{TRUE},
then we don't bother calling the text handler function.
So the key thing to remember about dealing with text is that the
entire text of a node may come in multiple separate calls
to the text handler. A common idiom is to have the text handler
concatenate the values it is passed in separate calls and
to have the end element handler process the entire text and reset
the text variable to be empty.
}
\usage{
xmlEventParse(file, handlers = xmlEventHandler(),
ignoreBlanks = FALSE, addContext=TRUE,
useTagName = TRUE, asText = FALSE, trim=TRUE,
useExpat=FALSE, isURL = FALSE,
state = NULL, replaceEntities = TRUE, validate = FALSE,
saxVersion = 1, branches = NULL,
useDotNames = length(grep("^\\\\.", names(handlers))) > 0,
error = xmlErrorCumulator(), addFinalizer = NA,
encoding = character())
}
\arguments{
\item{file}{the source of the XML content.
This can be a string giving the name of a file or remote URL,
the XML itself, a connection object, or a function.
If this is a string, and \code{asText} is \code{TRUE},
the value is the XML content.
This allows one to read the content separately from parsing
without having to write it to a file.
If \code{asText} is \code{FALSE} and a string is passed
for \code{file}, this is taken as the name of a
file or remote URI. If one is using the libxml parser (i.e. not expat),
this can be a URI accessed via HTTP or FTP or a compressed local file.
If it is the name of a local file,
it can include \code{~}, environment variables, etc. which will be expanded by R.
(Note this is not the case in S-Plus, as far as I know.)
If a connection is given, the parser incrementally reads one line at
a time by calling the function \code{\link[base]{readLines}} with
the connection as the first argument (and \code{1} as the number of
lines to read). The parser calls this function each time it needs
more input.
If invoking the \code{readLines} function to get each line is
excessively slow or is inappropriate, one can provide a function as the value
of \code{fileName}. Again, when the XML parser needs more content
to process, it invokes this function to get a string.
This function is called with a single argument, the maximum size
of the string that can be returned.
The function is responsible for accessing the correct connection(s),
etc. which is typically done via lexical scoping/environments.
This mechanism allows the user to control how the XML content
is retrieved in very general ways. For example, one might
read from a set of files, starting one when the contents
of the previous file have been consumed. This allows for the
use of hybrid connection objects.
Support for connections and functions in this form is only
provided if one is using libxml2 and not libxml version 1.
}
\item{handlers}{ a closure object that contains functions which will be invoked
as the XML components in the document are encountered by the parser.
The standard function or handler names are
\code{startElement()}, \code{endElement()}
\code{comment()}, \code{getEntity},
\code{entityDeclaration()}, \code{processingInstruction()},
\code{text()}, \code{cdata()},
\code{startDocument()}, and \code{endDocument()},
or alternatively and preferrably,
these names prefixed with a '.',
i.e. .startElement, .comment, ...
The call signature for the entityDeclaration function was changed in
version 1.7-0. Note that in earlier versions, the C routine did not
invoke any R function and so no code will actually break.
Also, we have renamed \code{externalEntity} to \code{getEntity}.
These were based on the expat parser.
The new signature is
\code{c(name = "character",
type = "integer",
content = "",
system = "character",
public = "character"
)}
\code{name} gives the name of the entity being
defined.
The \code{type} identifies
the type of the entity using the value
of a C-level enumerated constant used in libxml2,
but also gives the human-readable form
as the name of the single element in the integer vector.
The possible values are
\code{"Internal_General"},
\code{"External_General_Parsed"},
\code{"External_General_Unparsed"}, \code{"Internal_Parameter"},
\code{"External_Parameter"}, \code{"Internal_Predefined"}.
If we are dealing with an internal entity,
the content will be the string containing
the value of the entity.
If we are dealing with an external entity,
then \code{content} will be a character vector of length
0, i.e. empty.
Instead, either or both of the system and public
arguments will be non-empty and identify the
location of the external content.
\code{system} will be a string containing a URI, if non-empty,
and \code{public} corresponds to the PUBLIC identifier used
to identify content using an SGML-like approach.
The use of PUBLIC identifiers is less common.
}
\item{ignoreBlanks}{a logical value indicating whether
text elements made up entirely of white space should be included
in the resulting `tree'. }
\item{addContext}{ logical value indicating whether the callback functions
in `handlers' should be invoked with contextual information about
the parser and the position in the tree, such as node depth,
path indices for the node relative the root, etc.
If this is True, each callback function should support
\dots.
}
\item{useTagName}{ a logical value.
If this is \code{TRUE}, when the SAX parser signals an event for the
start of an XML element, it will first look for an element in the
list of handler functions whose name matches (exactly) the name of
the XML element. If such an element is found, that function is
invoked. Otherwise, the generic \code{startElement} handler function
is invoked. The benefit of this is that the author of the handler
functions can write node-specific handlers for the different element
names in a document and not have to establish a mechanism to invoke
these functions within the \code{startElement} function. This is done
by the XML package directly.
If the value is \code{FALSE}, then the \code{startElement} handler
function will be called without any effort to find a node-specific
handler. If there are no node-specific handlers, specifying
\code{FALSE} for this parameter will make the computations very
slightly faster.
}
\item{asText}{logical value indicating that the first argument,
`file',
should be treated as the XML text to parse, not the name of
a file. This allows the contents of documents to be retrieved
from different sources (e.g. HTTP servers, XML-RPC, etc.) and still
use this parser.}
\item{trim}{
whether to strip white space from the beginning and end of text strings.
}
% \item{restartCounter}{}
\item{useExpat}{
a logical value indicating whether to use the expat SAX parser,
or to default to the libxml.
If this is TRUE, the library must have been compiled with support for expat.
See \link{supportsExpat}.
}
\item{isURL}{
indicates whether the \code{file} argument refers to a URL
(accessible via ftp or http) or a regular file on the system.
If \code{asText} is TRUE, this should not be specified.
}
\item{state}{an optional S object that is passed to the
callbacks and can be modified to communicate state between
the callbacks. If this is given, the callbacks should accept
an argument named \code{.state} and it should return an object
that will be used as the updated value of this state object.
The new value can be any S object and will be passed to the next
callback where again it will be updated by that functions return
value, and so on.
If this not specified in the call to \code{xmlEventParse},
no \code{.state} argument is passed to the callbacks. This makes the
interface compatible with previous releases.
}
\item{replaceEntities}{
logical value indicating whether to substitute entity references
with their text directly. This should be left as False.
The text still appears as the value of the node, but there
is more information about its source, allowing the parse to be reversed
with full reference information.
}
\item{saxVersion}{an integer value which should be either 1 or 2.
This specifies which SAX interface to use in the C code.
The essential difference is the number of arguments passed to the
\code{startElement} handler function(s). Under SAX 2, in addition to the name of
the element and the named-attributes vector, two additional arguments
are provided.
The first identifies the namespace of the element.
This is a named character vector of length 1,
with the value being the URI of the namespace and the name
being the prefix that identifies that namespace within the document.
For example, \code{xmlns:r="http://www.r-project.org"}
would be passed as \code{c(r = "http://www.r-project.org")}.
If there is no prefix because the namespace is being used as the
default, the result of calling \code{\link[base]{names}} on
the string is \code{""}.
The second additional argument (the fourth in total) gives the collection of all the namespaces
defined within this element.
Again, this is a named character vector.
}
\item{validate}{
Currently, this has no effect as the libxml2 parser uses a
document structure to do validation.
a logical indicating whether to use a validating parser or not, or in other words
check the contents against the DTD specification. If this is true, warning
messages will be displayed about errors in the DTD and/or document, but the parsing
will proceed except for the presence of terminal errors.
}
\item{branches}{a named list of functions.
Each element identifies an XML element name.
If an XML element of that name is encountered in
the SAX stream, the stream is processed until the
end of that element and an internal node (see
\code{\link{xmlTreeParse}} and its \code{useInternalNodes} parameter)
is created. The function in our branches list corresponding to this
XML element is then invoked with the (internal) node as the only
argument.
This allows one to use the DOM model on a sub-tree of the entire
document and thus use both SAX and DOM together to get the
efficiency of SAX and the simpler programming model of DOM.
Note that the branches mechanism works top-down and does not
work for nested tags. If one specifies an element name in the
\code{branches} argument, e.g. myNode, and
there is a nested myNode instance within a branch, the branches
handler will not be called for that nested instance.
If there is an instance where this is problematic, please
contact the maintainer of this package.
One can cause the parser to collect a branch without identifying
the node within the \code{branches} list. Specifically, within
a regular start-element handler, one can return a function
whose class is \code{SAXBranchFunction}.
The SAX parser recognizes this and collects up the branch
starting at the current node being processed and when it is
complete, invokes this function.
This allows us to dynamically determine which nodes to treat as
branches rather than just matching names. This is necessary when
a node name has different meanings in different parts of the XML
hierarchy, e.g. dict in an iTunes song list.
See the file \code{itunesSax2.R} inthe examples for an example of this.
This is a two step process. In the future, we might make it so that
the R function handling the start-element event could directly
collect the branch and continue its operations without having
to call another function asynchronously.
}
\item{useDotNames}{a logical value
indicating whether to use the
newer format for identifying general element function handlers
with the '.' prefix, e.g. .text, .comment, .startElement.
If this is \code{FALSE}, then the older format
text, comment, startElement, ...
are used. This causes problems when there are indeed nodes
named text or comment or startElement as a
node-specific handler are confused with the corresponding
general handler of the same name. Using \code{TRUE}
means that your list of handlers should have names that use
the '.' prefix for these general element handlers.
This is the preferred way to write new code.
}
\item{error}{a function that is called when an XML error is encountered.
This is called with 6 arguments and is described in \code{\link{xmlTreeParse}}.
}
\item{addFinalizer}{a logical value or identifier for a C routine
that controls whether we register finalizers on the intenal node.}
\item{encoding}{ a character string (scalar) giving the encoding for the
document. This is optional as the document should contain its own
encoding information. However, if it doesn't, the caller can specify
this for the parser.
}
}
\details{
This is now implemented using the libxml parser.
Originally, this was implemented via the Expat XML parser by
Jim Clark (\url{http://www.jclark.com}).
}
\value{
The return value is the `handlers'
argument. It is assumed that this is a closure and that
the callback functions have manipulated variables
local to it and that the caller knows how to extract this.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml}}
\author{Duncan Temple Lang}
\note{
The libxml parser can read URLs via http or ftp.
It does not require the support of \code{wget} as used
in other parts of \R, but uses its own facilities
to connect to remote servers.
The idea for the hybrid SAX/DOM mode where we consume tokens in the
stream to create an entire node for a sub-tree of the document was
first suggested to me by Seth Falcon at the Fred Hutchinson Cancer
Research Center. It is similar to the XML::Twig module in Perl
by Michel Rodriguez.
}
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{xmlStopParser}}
XMLParserContextFunction
}
\examples{
fileName <- system.file("exampleData", "mtcars.xml", package="XML")
# Print the name of each XML tag encountered at the beginning of each
# tag.
# Uses the libxml SAX parser.
xmlEventParse(fileName,
list(startElement=function(name, attrs){
cat(name,"\n")
}),
useTagName=FALSE, addContext = FALSE)
\dontrun{
# Parse the text rather than a file or URL by reading the URL's contents
# and making it a single string. Then call xmlEventParse
xmlURL <- "http://www.omegahat.net/Scripts/Data/mtcars.xml"
xmlText <- paste(scan(xmlURL, what="",sep="\n"),"\n",collapse="\n")
xmlEventParse(xmlText, asText=TRUE)
}
# Using a state object to share mutable data across callbacks
f <- system.file("exampleData", "gnumeric.xml", package = "XML")
zz <- xmlEventParse(f,
handlers = list(startElement=function(name, atts, .state) {
.state = .state + 1
print(.state)
.state
}), state = 0)
print(zz)
# Illustrate the startDocument and endDocument handlers.
xmlEventParse(fileName,
handlers = list(startDocument = function() {
cat("Starting document\n")
},
endDocument = function() {
cat("ending document\n")
}),
saxVersion = 2)
if(libxmlVersion()$major >= 2) {
startElement = function(x, ...) cat(x, "\n")
xmlEventParse(ff <- file(f), handlers = list(startElement = startElement))
close(ff)
# Parse with a function providing the input as needed.
xmlConnection =
function(con) {
if(is.character(con))
con = file(con, "r")
if(isOpen(con, "r"))
open(con, "r")
function(len) {
if(len < 0) {
close(con)
return(character(0))
}
x = character(0)
tmp = ""
while(length(tmp) > 0 && nchar(tmp) == 0) {
tmp = readLines(con, 1)
if(length(tmp) == 0)
break
if(nchar(tmp) == 0)
x = append(x, "\n")
else
x = tmp
}
if(length(tmp) == 0)
return(tmp)
x = paste(x, collapse="")
x
}
}
\donttest{## this leaves a connection open
## xmlConnection would need amending to return the connection.
ff = xmlConnection(f)
xmlEventParse(ff, handlers = list(startElement = startElement))
}
# Parse from a connection. Each time the parser needs more input, it
# calls readLines(, 1)
xmlEventParse(ff <-file(f), handlers = list(startElement = startElement))
close(ff)
# using SAX 2
h = list(startElement = function(name, attrs, namespace, allNamespaces){
cat("Starting", name,"\n")
if(length(attrs))
print(attrs)
print(namespace)
print(allNamespaces)
},
endElement = function(name, uri) {
cat("Finishing", name, "\n")
})
xmlEventParse(system.file("exampleData", "namespaces.xml", package="XML"),
handlers = h, saxVersion = 2)
# This example is not very realistic but illustrates how to use the
# branches argument. It forces the creation of complete nodes for
# elements named and extracts the id attribute.
# This could be done directly on the startElement, but this just
# illustrates the mechanism.
filename = system.file("exampleData", "branch.xml", package="XML")
b.counter = function() {
nodes <- character()
f = function(node) { nodes <<- c(nodes, xmlGetAttr(node, "id"))}
list(b = f, nodes = function() nodes)
}
b = b.counter()
invisible(xmlEventParse(filename, branches = b["b"]))
b$nodes()
filename = system.file("exampleData", "branch.xml", package="XML")
invisible(xmlEventParse(filename, branches = list(b = function(node) {
print(names(node))})))
invisible(xmlEventParse(filename, branches = list(b = function(node) {
print(xmlName(xmlChildren(node)[[1]]))})))
}
############################################
# Stopping the parser mid-way and an example of using XMLParserContextFunction.
startElement =
function(ctxt, name, attrs, ...) {
print(ctxt)
print(name)
if(name == "rewriteURI") {
cat("Terminating parser\n")
xmlStopParser(ctxt)
}
}
class(startElement) = "XMLParserContextFunction"
endElement =
function(name, ...)
cat("ending", name, "\n")
fileName = system.file("exampleData", "catalog.xml", package = "XML")
xmlEventParse(fileName, handlers = list(startElement = startElement,
endElement = endElement))
}
\keyword{file}
\keyword{IO}
XML/man/coerce.Rd 0000644 0001760 0000144 00000001421 12030407024 013232 0 ustar ripley users \name{coerceNodes}
\alias{coerce,XMLHashTreeNode,XMLHashTree-method}
\alias{coerce,XMLInternalDocument,XMLHashTree-method}
\alias{coerce,XMLInternalNode,XMLHashTree-method}
\alias{coerce,XMLNode,XMLInternalNode-method}
\alias{coerce,XMLAbstractDocument,XMLAbstractNode-method}
\title{Transform between XML representations}
\description{
This collection of coercion methods (i.e. \code{as(obj, "type")})
allows users of the \code{XML} package to switch between different
representations of XML nodes and to map from an XML document to
the root node and from a node to the document.
This helps to manage the nodes
}
%\usage{}
\value{
An object of the target type.
}
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{xmlParse}}
}
\keyword{IO}
\keyword{programming}
\concept{XML}
XML/man/xmlParent.Rd 0000644 0001760 0000144 00000007145 11741563530 013772 0 ustar ripley users \name{xmlParent}
\alias{xmlParent}
\alias{xmlAncestors}
\alias{xmlParent.XMLInternalNode}
\alias{xmlParent,XMLInternalNode-method}
\alias{xmlParent,XMLHashTreeNode-method}
\alias{xmlParent,XMLTreeNode-method}
\title{Get parent node of XMLInternalNode or ancestor nodes}
\description{
\code{xmlParent} operates on an XML node
and returns a reference to its parent node
within the document tree.
This works for an internal, C-level
\code{XMLInternalNode} object
created, for examply, using \code{\link{newXMLNode}}
and related functions or \code{\link{xmlTree}}
or from \code{\link{xmlTreeParse}} with the
\code{useInternalNodes} parameter.
It is possible to find the parent of an R-level
XML node when using a tree
created with, for example, \code{\link{xmlHashTree}}
as the parent information is stored separately.
\code{xmlAncestors} walks the chain of parens to the
top of the document and either returns a list of those
nodes, or alternatively a list of the values obtained
by applying a function to each of the nodes.
}
\usage{
xmlParent(x, ...)
xmlAncestors(x, fun = NULL, ..., addFinalizer = NA, count = -1L)
}
\arguments{
\item{x}{an object of class \code{XMLInternalNode} whose parent is being requested. }
\item{fun}{an R function which is invoked for each node as we walk up
the tree.}
\item{\dots}{any additional arguments that are passed in calls to
\code{fun} after the node object and for \code{xmlParent} this allows methods to define their
own additional parameters.}
\item{addFinalizer}{a logical value indicating whether the
default finalizer routine should be registered to
free the internal xmlDoc when R no longer has a reference to this
external pointer object.
This can also be the name of a C routine or a reference
to a C routine retrieved using
\code{\link[base]{getNativeSymbolInfo}}.
}
\item{count}{an integer that indicates how many levels of the hierarchy
to traverse. This allows us to get the \code{count} most recent
ancestors of the node.}
}
\details{
This uses the internal libxml structures to access the parent in the DOM tree.
This function is generic so that we can add methods for other types of nodes
if we so want in the future.
}
\value{
\code{xmlParent} returns object of class \code{XMLInternalNode}.
If \code{fun} is \code{NULL}, \code{xmlAncestors} returns a list of the nodes in order of
top-most node or root of the tree, then its child, then the child of
that child, etc. This is the reverse order in which the nodes are
visited/found.
If \code{fun} is a function, \code{xmlAncestors} returns a list
whose elements are the results of calling that function for
each node. Again, the order is top down.
}
\references{\url{http://www.w3.org/XML}}
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlChildren}}
\code{\link{xmlTreeParse}}
\code{\link{xmlNode}}
}
\examples{
top = newXMLNode("doc")
s = newXMLNode("section", attr = c(title = "Introduction"))
a = newXMLNode("article", s)
addChildren(top, a)
xmlName(xmlParent(s))
xmlName(xmlParent(xmlParent(s)))
# Find the root node.
root = a
while(!is.null(xmlParent(root)))
root = xmlParent(root)
# find the names of the parent nodes of each 'h' node.
# use a global variable to "simplify" things and not use a closure.
filename = system.file("exampleData", "branch.xml", package = "XML")
parentNames <- character()
xmlParse(filename,
handlers =
list(h = function(x) {
parentNames <<- c(parentNames, xmlName(xmlParent(x)))
}))
table(parentNames)
}
\keyword{file}
\keyword{IO}
XML/man/append.XMLNode.Rd 0000644 0001760 0000144 00000003677 12665242441 014543 0 ustar ripley users \name{append.xmlNode}
\alias{append.xmlNode}
\alias{append.XMLNode}
\title{Add children to an XML node}
\description{
This appends one or more XML nodes as children of an existing node.
}
\usage{
append.XMLNode(to, ...)
append.xmlNode(to, ...)
}
\arguments{
\item{to}{the XML node to which the sub-nodes are to be added.}
\item{\dots}{the sub-nodes which are to be added to the \code{to} node.
If this is a \code{list} of \code{XMLNode} objects (e.g. create by a call to
\code{\link{lapply}}), then that list is used.}
}
\value{
The original \code{to} node containing its new children nodes.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{[<-.XMLNode}}
\code{\link{[[<-.XMLNode}}
\code{\link{[.XMLNode}}
\code{\link{[[.XMLNode}}
}
\examples{
# Create a very simple representation of a simple dataset.
# This is just an example. The result is
#
#
#
# A
#
#
# B
#
#
#
# 1.2 3.5
#
#
# 20.2 13.9
#
#
# 10.1 5.67
#
#
n = xmlNode("data", attrs = c("numVars" = 2, numRecords = 3))
n = append.xmlNode(n, xmlNode("varNames", xmlNode("string", "A"), xmlNode("string", "B")))
n = append.xmlNode(n, xmlNode("record", "1.2 3.5"))
n = append.xmlNode(n, xmlNode("record", "20.2 13.9"))
n = append.xmlNode(n, xmlNode("record", "10.1 5.67"))
print(n)
\dontrun{
tmp <- lapply(references, function(i) {
if(!inherits(i, "XMLNode"))
i <- xmlNode("reference", i)
i
})
r <- xmlNode("references")
r[["references"]] <- append.xmlNode(r[["references"]], tmp)
}
}
\keyword{file}
\keyword{IO}
XML/man/isXMLString.Rd 0000644 0001760 0000144 00000004160 12030205013 014152 0 ustar ripley users \name{isXMLString}
\Rdversion{1.1}
\alias{isXMLString}
\alias{xml}
\alias{xmlParseString}
\alias{XMLString-class}
\title{Facilities for working with XML strings}
\description{
These functions and classes are used to represent and parse a
string whose content is known to be XML.
\code{xml} allows us to mark a character vector as containing XML,
i.e. of class \code{XMLString}.
\code{xmlParseString} is a convenience routine for converting an
XML string into an XML node/tree.
\code{isXMLString} is examines a strings content and heuristically
determines whether it is XML.
}
\usage{
isXMLString(str)
xmlParseString(content, doc = NULL, namespaces = RXMLNamespaces,
clean = TRUE, addFinalizer = NA)
xml(x)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{str,x,content}{the string containing the XML material.}
\item{doc}{if specified, an \code{XMLInternalDocument} object which is
used to "house" the new nodes. Specifically, when the nodes are
created, they are made as part of this document. This may not be as
relevant now with the garbage collection being done at a node and
document level. But it still potentially of some value.}
\item{namespaces}{a character vector giving the URIs for the XML namespaces which are to be removed if \code{clean} is \code{TRUE}.}
\item{clean}{a logical value that controls whether namespaces are removed after the document is parsed..}
\item{addFinalizer}{a logical value or identifier for a C routine
that controls whether we register finalizers on the intenal node.}
}
%\details{}
\value{
\code{isXMLString} returns a logical value.
\code{xmlParseString} returns an object of class
\code{XMLInternalElementNode}.
\code{xml} returns an object of class \code{XMLString}
identifying the text as \code{XML}.
}
%\references{}
\author{Dncan Temple Lang}
\seealso{
\code{\link{xmlParse}}
\code{\link{xmlTreeParse}}
}
\examples{
isXMLString("a regular string < 20 characters long")
isXMLString("c")
xmlParseString("c")
# We can lie!
isXMLString(xml("foo"))
}
\keyword{IO}
\concept{XML}
XML/man/catalogs.Rd 0000644 0001760 0000144 00000011473 12665242441 013615 0 ustar ripley users
\name{catalogLoad}
\alias{catalogLoad}
\alias{catalogClearTable}
\alias{catalogAdd}
\alias{catalogDump}
\title{Manipulate XML catalog contents}
\description{
These functions allow the R user to programmatically control the
XML catalog table used in the XML parsing tools in the
C-level libxml2 library and hence in R packages that use these, e.g.
the XML and Sxslt packages.
Catalogs are consulted whenever an external document needs to be loaded.
XML catalogs allow one to influence how such a document is loaded
by mapping document identifiers to
alternative locations, for example to refer to locally
available versions.
They support mapping URI prefixes to local file directories/files,
resolving both SYSTEM and PUBLIC identifiers used in DOCTYPE declarations at the
top of an XML/HTML document, and delegating resolution to other catalog files.
Catalogs are written using an XML format.
Catalogs allow resources used in XInclude nodes and XSL templates
to refer to generic network URLs and have these be mapped to local files
and so avoid potentially slow network retrieval.
Catalog files are written in XML
We might have a catalog file that contains the XML
In the XDynDocs package, we refer to OmegahatXSL files and
DocBook XSL files have a catalog file of the form
The functions provided here allow the R programmer to
empty the current contents of the global catalog table and so
start from scratch (
\code{catalogClearTable}
),
load the contents of a catalog file into the global catalog table (
\code{catalogLoad}
),
and to add individual entries programmatically without the need for a catalog table.
In addition to controlling the catalogs via these functions, we can
use \code{\link{catalogResolve}} to use the catalog
to resolve the name of a resource and map it to a local resource.
\code{catalogDump} allows us to retrieve an XML document representing the current
contents of the in-memory catalog .
More information can be found at
\url{http://xmlsoft.org/catalog.html}
and \url{http://www.sagehill.net/docbookxsl/Catalogs.html}
among many resources and the specification for the catalog format at
\url{http://www.oasis-open.org/committees/entity/spec-2001-08-06.html}.
}
\usage{
catalogLoad(fileNames)
catalogClearTable()
catalogAdd(orig, replace, type = "rewriteURI")
catalogDump(fileName = tempfile(), asText = TRUE)
}
\arguments{
\item{orig}{a character vector of identifiers, e.g. URIs, that are to be mapped to a different
name via the catalog.
This can be a named character vector where the names are the original URIs and the values are the
corresponding rewritten values.
}
\item{replace}{a character vector of the rewritten or resolved values for the
identifiers given in orig. Often this omitted and the
original-rewrite pairs are given as a named vector via orig.
}
\item{type}{a character vector with the same length as
orig (or recycled to have the same length) which specifies
the type of the resources in the elements of orig.
Valid values are rewriteURI, rewriteSystem, system, public.
}
\item{fileNames}{a character vector giving the names of the
catalog files to load.}
\item{fileName}{the name of the file in which to place the contents of the current catalog}
\item{asText}{a logical value which indicates whether to write the catalog
as a character string if \code{filename} is not specified.}
}
\value{
These functions are used for their side effects on the global
catalog table maintained in C by libxml2. Their return values are
logical values/vectors indicating whether the particular operation
were successful or not.
}
\references{
This provides an R-like interface to a small subset of the catalog API
made available in libxml2.
}
\seealso{
\code{\link{catalogResolve}}
XInclude, XSL and import/include directives.
In addition to these functions, there is an un-exported, undocumented
function named \code{catalogDump} that can be used to
get the contents of the (first) catalog table.
}
\examples{
# Add a rewrite rule
#
#
catalogAdd(c("http://www.omegahat.net/XML" = system.file("XML", package
= "XML")))
catalogAdd("http://www.omegahat.net/XML", system.file("XML", package =
"XML"))
catalogAdd("http://www.r-project.org/doc/",
paste(R.home(), "doc", "", sep = .Platform$file.sep))
#
# This shows how we can load a catalog and then resolve a
# systemidentifier that it maps.
#
catalogLoad(system.file("exampleData", "catalog.xml", package = "XML"))
catalogResolve("docbook4.4.dtd", "system")
catalogResolve("-//OASIS//DTD DocBook XML V4.4//EN", "public")
}
\keyword{IO}
XML/man/parseDTD.Rd 0000644 0001760 0000144 00000010174 12665242441 013463 0 ustar ripley users \name{parseDTD}
\alias{parseDTD}
\title{Read a Document Type Definition (DTD)}
\description{
Represents the contents of a DTD as a user-level object containing the element and entity definitions.
}
\usage{
parseDTD(extId, asText=FALSE, name="", isURL=FALSE, error = xmlErrorCumulator())
}
\arguments{
\item{extId}{The name of the file containing the DTD to be processed.}
\item{asText}{logical indicating whether the value of `extId' is the name of a file
or the DTD content itself. Use this when the DTD is read as a character vector, before being parsed
and handed to the parser as content only.}
\item{name}{Optional name to provide to the parsing mechanism.}
\item{isURL}{A logical value indicating whether the input source is to be
considred a URL or a regular file or string containing the XML.}
\item{error}{an R function that is called when an error is
encountered. This can report it and continue or terminate by raising
an error in R. See the error parameter for \code{link{xmlTreeParse}}.}
}
\details{
Parses and converts the contents of the DTD in the specified file
into a user-level object containing all the information about the DTD.
}
\value{
A list with two entries, one for the entities and the other for the elements
defined within the DTD.
\item{entities}{a named list of the entities defined in the DTD.
Each entry is indexed by the name of the corresponding entity.
Each is an object of class \code{XMLEntity}
or alternatively \code{XMLExternalEntity} if the entity refers
to an external definition.
The fields of these types of objects are
\itemize{
\item{name}{the name of the entity by which users refer to it.}
\item{content}{the expanded value or definition of the entity}
\item{original}{the value of the entity, but with references to other entities not expanded, but
maintained in symbolic form.}
}
}
\item{elements}{a named list of the elements defined in the DTD, with the name of each element being
the identifier of the element being defined.
Each entry is an object of class \code{XMLElementDef}
which has 4 fields.
\itemize{
\item{name}{the name of the element.}
\item{type}{a named integer indicating the type of entry in the DTD, usually
either \code{element} or \code{mixed}.
The name of the value is a user-level type. The value is used for programming, both
internally and externally.
}
\item{contents}{a description of the elements that can be nested within this element.
This is an object of class \code{XMLElementContent}
or one of its specializations - \code{XMLSequenceContent}, \code{XMLOrContent}.
Each of these encodes the number of such elements permitted
(one, one or more, zero or one, or zero or more);
the type indicating whether the contents consist of a single element type,
an ordered sequence of elements, or one of a set of elements.
Finally, the actual contents description is described in the
\code{elements} field. This is a list of
one or more \code{XMLElementContent}, \code{XMLSequenceContent} and \code{XMLOrContent}
objects.
}
\item{attributes}{a named list of the attributes defined for this element in the DTD.
Each element is of class \code{XMLAttributeDef} which has 4 fields.
\item{name}{name of the attribute, i.e. the left hand side}
\item{type}{the type of the value, e.g. an CDATA, Id, Idref(s), Entity(s), NMToken(s), Enumeration, Notation}
\item{defaultType}{the defined type, one of None, Implied, Fixed or Required.}
\item{defaultValue}{the default value if it is specified, or the enumerated values as a character vector, if the type is Enumeration.}
}
}
}
}
\references{ \url{http://www.w3.org} }
\author{Duncan Temple Lang }
\note{Needs libxml (currently version 1.8.7)}
\section{WARNING}{Errors in the DTD are stored as warnings for programmatic access.}
\seealso{\code{\link{xmlTreeParse}},
WritingXML.html in the distribution.}
\examples{
dtdFile <- system.file("exampleData", "foo.dtd",package="XML")
parseDTD(dtdFile)
txt <- readLines(dtdFile)
txt <- paste(txt, collapse="\n")
d <- parseDTD(txt, asText=TRUE)
\dontrun{
url <- "http://www.omegahat.net/XML/DTDs/DatasetByRecord.dtd"
d <- parseDTD(url, asText=FALSE)
}
}
\keyword{file}
\keyword{IO}
XML/man/xmlCleanNamespaces.Rd 0000644 0001760 0000144 00000002666 12157346344 015572 0 ustar ripley users \name{xmlCleanNamespaces}
\alias{xmlCleanNamespaces}
\title{Remove redundant namespaces on an XML document}
\description{
This is a convenience function that removes redundant
repeated namespace definitions in an XML node.
It removes namespace definitions in nodes
where an ancestor node also has that definition.
It does not remove unused namespace definitions.
This uses the \code{NSCLEAN} option for \code{\link{xmlParse}}
}
\usage{
xmlCleanNamespaces(doc, options = integer(), out = docName(doc), ...)
}
\arguments{
\item{doc}{either the name of an XML documentor the XML content
itself, or an already parsed document}
\item{options}{options for the XML parser. \code{NSCLEAN} is added to this.}
\item{\dots}{additional arguments passed to \code{\link{xmlParse}}}
\item{out}{the name of a file to which to write the resulting XML
document, or an empty character vector or logical value \code{FALSE}
to avoid writing the new document.
}
}
\value{
If the new document is written to a file, the name of the file is
returned.
Otherwise, the new parsed XML document is returned.
}
\references{
libxml2 documentation \url{http://xmlsoft.org/html/libxml-parser.html}
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{xmlParse}}
}
\examples{
f = system.file("exampleData", "redundantNS.xml", package = "XML")
doc = xmlParse(f)
print(doc)
newDoc = xmlCleanNamespaces(f, out = FALSE)
}
\keyword{programming}
\keyword{IO}
XML/man/xmlElementsByTagName.Rd 0000644 0001760 0000144 00000005532 12665242441 016044 0 ustar ripley users \name{xmlElementsByTagName}
\alias{xmlElementsByTagName}
\title{Retrieve the children of an XML node with a specific tag name}
\description{
This returns a list of the children or sub-elements of
an XML node whose tag name matches the one specified by
the user.
}
\usage{
xmlElementsByTagName(el, name, recursive = FALSE)
}
\arguments{
\item{el}{the node whose matching children are to be retrieved.}
\item{name}{a string giving the name of the tag to match in each of
\code{el}'s children.}
\item{recursive}{a logical value. If this is \code{FALSE}, the
default, only the direct child nodes are searched.
Alternatively, if this is \code{TRUE}, all sub-nodes
at all levels are searched. In other words,
we find all descendants of the node \code{el}
and return a list with the nodes having the given name.
The relationship between the nodes in the resulting list
cannot be determined. This is a set of nodes. See the note.
}
}
\details{
This does a simple matching of names and subsets the XML node's
children list.
If \code{recursive} is \code{TRUE}, then the function is applied
recursively to the children of the given node and so on.
}
\note{
The addition of the \code{recursive} argument makes this
function behave like the \code{getElementsByTagName}
in other language APIs such as Java, C\#.
However, one should be careful to understand that
in those languages, one would get back a set of
node objects. These nodes have references to their
parents and children. Therefore one can navigate the
tree from each node, find its relations, etc.
In the current version of this package (and for the forseeable
future), the node set is a \dQuote{copy} of the
nodes in the original tree. And these have no facilities
for finding their siblings or parent.
Additionally, one can consume a large amount of memory by taking
a copy of numerous large nodes using this facility.
If one does not modify the nodes, the extra memory may be small. But
modifying them means that the contents will be copied.
Alternative implementations of the tree, e.g. using unique identifiers
for nodes or via internal data structures from libxml can allow us to
implement this function with different semantics, more similar to
the other APIs.
}
\value{
A list containing those child nodes of \code{el} whose
tag name matches that specified by the user.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.omegahat.net/RSXML},
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlChildren}}
\code{\link{xmlTreeParse}}
}
\examples{
\dontrun{
doc <- xmlTreeParse("http://www.omegahat.net/Scripts/Data/mtcars.xml")
xmlElementsByTagName(doc$children[[1]], "variable")
}
doc <- xmlTreeParse(system.file("exampleData", "mtcars.xml", package="XML"))
xmlElementsByTagName(xmlRoot(doc)[[1]], "variable")
}
\keyword{IO}
\keyword{file}
XML/man/getLineNumber.Rd 0000644 0001760 0000144 00000004127 12122117137 014545 0 ustar ripley users \name{getLineNumber}
\alias{getNodeLocation}
\alias{getNodePosition}
\alias{getLineNumber}
\title{Determine the location - file \& line number of an (internal) XML node}
\description{
The \code{getLineNumber} function is used to query the location of an internal/C-level
XML node within its original "file". This gives us the line number.
\code{getNodeLocation} gives both the line number and the name of the
file in which the node is located, handling XInclude files in a
top-level document and identifying the included file, as appropriate.
\code{getNodePosition} returns a simplified version of
\code{getNodeLocation},
combining the file and line number into a string and ignoring the
\code{XPointer} component.
This is useful when we identify a node with a particular charactestic
and want to view/edit the original document, e.g. when authoring an
Docbook article.
}
\usage{
getLineNumber(node, ...)
getNodeLocation(node, recursive = TRUE, fileOnly = FALSE)
}
\arguments{
\item{node}{the node whose location or line number is of interest}
\item{\dots}{additional parameters for methods should they be defined.}
\item{recursive}{a logical value that controls whether the
full path of the nested includes is returned or just the
path in the immediate XInclude element.}
\item{fileOnly}{a logical value which if \code{TRUE} means that only
the name of the file is returned, and not the \code{xpointer} attribute or
line number .}
}
\value{
\code{getLineNumber} returns an integer.
\code{getNodeLocation} returns a list with two elements -
\code{file} and \code{line} which are a character string
and the integer line number.
For text nodes, the line number is taken from the previous sibling
nodes or the parent node.
}
\references{libxml2 }
\author{Duncan Temple Lang}
\seealso{
\code{\link{findXInclude}}
\code{\link{xmlParse}}
\code{\link{getNodeSet}}
\code{\link{xpathApply}}
}
\examples{
f = system.file("exampleData", "xysize.svg", package = "XML")
doc = xmlParse(f)
e = getNodeSet(doc, "//ellipse")
sapply(e, getLineNumber)
}
\keyword{IO}
\concept{XML}
XML/man/xmlNode.Rd 0000644 0001760 0000144 00000010533 12665242441 013422 0 ustar ripley users \name{xmlNode}
\alias{xmlNode}
\alias{xmlTextNode}
\alias{xmlPINode}
\alias{xmlCDataNode}
\alias{xmlCommentNode}
\title{Create an XML node}
\description{
These functions allow one to create XML nodes
as are created in C code when reading XML documents.
Trees of XML nodes can be constructed and
integrated with other trees generated manually
or with via the parser.
}
\usage{
xmlNode(name, ..., attrs=NULL, namespace="", namespaceDefinitions = NULL,
.children = list(...))
xmlTextNode(value, namespace="", entities = XMLEntities, cdata = FALSE)
xmlPINode(sys, value, namespace="")
xmlCDataNode(...)
xmlCommentNode(text)
}
\arguments{
\item{name}{The tag or element name of the XML node. This is what appears
in the elements as \code{ .. }}
\item{\dots}{The children nodes of this XML node.
These can be objects of class \code{XMLNode}
or arbitrary values that will be converted to a string
to form an \code{XMLTextNode} object.}
\item{.children}{an alternative mechanism to specifying the children
which is useful for programmatic use when one has the children
in an existing list. The \dots mechanism is for use when the
children are specified directly and individually.
}
\item{attrs}{A named character vector giving the
name, value pairs of attributes for this XML node.}
\item{value}{This is the text that is to be used when forming
an \code{XMLTextNode}.}
\item{cdata}{a logical value which controls whether the text
being used for the child node is to be first
enclosed within a CDATA node to escape special characters such
as \code{>} and \code{&}.
}
\item{namespace}{The XML namespace identifier for this node.}
\item{namespaceDefinitions}{a collection of name space definitions, containing the prefixes and the corresponding URIs.
This is most conveniently specified as a character vector whose names attribute is the vector of prefixes
and whose values are the URIs. Alternatively, one can provide a list of name space definition objects
such as those returned}
\item{sys}{the name of the system for which the processing instruction
is targeted. This is the value that appears in the
\code{}}
\item{text}{character string giving the contents of the comment.}
\item{entities}{a character vector giving the mapping
from special characters to their entity equivalent.
This provides the character-expanded
entity pairings of 'character = entity' , e.g. '<' = "lt" which are
used to make the content valid XML
so that it can be used within a text node.
The text searched sequentially
for instances of each character in the names and each instance is
replaced with the corresponding '&entity;' }
}
\value{
An object of class \code{XMLNode}.
In the case of \code{xmlTextNode},
this also inherits from \code{XMLTextNode}.
The fields or slots that objects
of these classes have
include
\code{name}, \code{attributes}, \code{children} and \code{namespace}.
However, one should
the accessor functions
\code{\link{xmlName}},
\code{\link{xmlAttrs}},
\code{\link{xmlChildren}}
and
\code{\link{xmlNamespace}}
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{addChildren}}
\code{\link{xmlTreeParse}}
\code{\link{asXMLNode}}
\code{\link{newXMLNode}}
\code{\link{newXMLPINode}}
\code{\link{newXMLCDataNode}}
\code{\link{newXMLCommentNode}}
}
\examples{
# node named arg with two children: name and defaultValue
# Both of these have a text node as their child.
n <- xmlNode("arg", attrs = c(default="TRUE"),
xmlNode("name", "foo"), xmlNode("defaultValue","1:10"))
# internal C-level node.
a = newXMLNode("arg", attrs = c(default = "TRUE"),
newXMLNode("name", "foo"),
newXMLNode("defaultValue", "1:10"))
xmlAttrs(a) = c(a = 1, b = "a string")
xmlAttrs(a) = c(a = 1, b = "a string", append = FALSE)
newXMLNamespace(a, c("r" = "http://www.r-project.org"))
xmlAttrs(a) = c("r:class" = "character")
xmlAttrs(a[[1]]) = c("r:class" = "character")
# Using a character vector as a namespace definitions
x = xmlNode("bob",
namespaceDefinitions = c(r = "http://www.r-project.org",
omg = "http://www.omegahat.net"))
}
\keyword{file}
XML/man/schema-class.Rd 0000644 0001760 0000144 00000002152 12030420607 014341 0 ustar ripley users \name{schema-class}
\docType{class}
\alias{ExternalReference-class}
\alias{SchemaAttributeGroupTable-class}
\alias{SchemaAttributeTable-class}
\alias{SchemaElementTable-class}
\alias{SchemaNotationTable-class}
\alias{SchemaTypeTable-class}
\alias{libxmlTypeTable-class}
\alias{xmlSchemaAttributeGroupRef-class}
\alias{xmlSchemaAttributeRef-class}
\alias{xmlSchemaElementRef-class}
\alias{xmlSchemaNotationRef-class}
\alias{xmlSchemaRef-class}
\alias{xmlSchemaTypeRef-class}
\alias{names,xmlSchemaRef-method}
\alias{$,xmlSchemaRef-method}
\alias{names,libxmlTypeTable-method}
\alias{$,libxmlTypeTable-method}
\alias{$<-,libxmlTypeTable-method}
\alias{coerce,libxmlTypeTable,list-method}
\alias{show,XMLSchemaValidationResults-method}
\title{Classes for working with XML Schema}
\description{
These are classes used when working with XML schema
and using them to validate a document or querying the
schema for its elements.
The basic representation is an external/native object stored in the
\code{ref} slot.
}
%\section{Slots}{ \describe{}}
%\section{Methods}{}
\seealso{
\code{\link{xmlSchemaValidate}}
}
\keyword{classes}
XML/man/xmlElementSummary.Rd 0000644 0001760 0000144 00000002770 11741563530 015507 0 ustar ripley users \name{xmlElementSummary}
\alias{xmlElementSummary}
\title{Frequency table of names of elements and attributes in XML content}
\description{
This function is used to get an understanding of the use
of element and attribute names in an XML document.
It uses a collection of handler functions to gather
the information via a SAX-style parser.
The distribution of attribute names is done within each
"type" of element (i.e. element name)
}
\usage{
xmlElementSummary(url, handlers = xmlElementSummaryHandlers(url))
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{url}{the source of the XML content, e.g. a file, a URL, a
compressed file, or a character string}
\item{handlers}{the list of handler functions used to collect the
information. These are passed to the function
\code{\link{xmlEventParse}}
as the value for the \code{handlers} parameter.
}
}
\value{
A list with two elements
\item{nodeCounts}{a named vector of counts where the names are the
(XML namespace qualified) element names in the XML content}
\item{attributes}{a list with as many elements as there are elements
in the \code{nodeCounts} element of the result. Each element of this
sub-list gives the frequency counts for the different attributes seen within
the XML elements with that name.}
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlEventParse}}
}
\examples{
xmlElementSummary(system.file("exampleData", "eurofxref-hist.xml.gz", package = "XML"))
}
\keyword{IO}
\concept{XML}
XML/man/free.Rd 0000644 0001760 0000144 00000003054 11741563530 012734 0 ustar ripley users \name{free}
\alias{free}
\alias{free,XMLInternalDocument-method}
\title{Release the specified object and clean up its memory usage}
\description{
This generic function is available for explicitly releasing
the memory associated with the given object.
It is intended for use on external pointer objects
which do not have an automatic finalizer
function/routine that cleans up the memory that is used
by the native object.
This is the case, for example, for an XMLInternalDocument.
We cannot free it with a finalizer in all cases as we may have
a reference to a node in the associated document tree.
So the user must explicitly release the XMLInternalDocument object
to free the memory it occupies.
}
\usage{
free(obj)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{obj}{ the object whose memory is to be released,
typically an external pointer object or object
that contains a slot that is an external pointer. }
}
\details{
The methods will generally call a C routine to free the native
memory.
}
\value{
An updated version of the object with the external address set to NIL.
This is up to the individual methods.
}
\author{ Duncan Temple Lang}
\seealso{
\code{\link{xmlTreeParse}} with \code{useInternalNodes = TRUE}
}
\examples{
f = system.file("exampleData", "boxplot.svg", package = "XML")
doc = xmlParse(f)
nodes = getNodeSet(doc, "//path")
rm(nodes)
# free(doc)
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{IO}
\concept{external memory}
XML/man/removeXMLNamespaces.Rd 0000644 0001760 0000144 00000002166 12030203111 015646 0 ustar ripley users \name{removeXMLNamespaces}
%\Rdversion{1.1}
\alias{removeXMLNamespaces}
\alias{removeXMLNamespaces,XMLInternalNode-method}
\alias{removeXMLNamespaces,XMLInternalElementNode-method}
\alias{removeXMLNamespaces,XMLInternalDocument-method}
\title{Remove namespace definitions from a XML node or document}
\description{
This function and its methods allow one to remove one
or more XML namespace definitions on XML nodes within a document.
}
\usage{
removeXMLNamespaces(node, ..., all = FALSE, .els = unlist(list(...)))
}
\arguments{
\item{node}{an XMLInternalNode or XMLInternalDocument object}
\item{\dots}{the names of the namespaces to remove or an
XMLNamespaceRef object returned via \code{\link{getNodeSet}} or \code{\link{xpathApply}}.}
\item{all}{a logical value indicating whether to remove all the
namespace definitions on a node.}
\item{.els}{a list which is sometimes a convenient way to specify the
namespaces to remove.}
}
\value{
This function is used for its side-effects and changing the internal node.}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{newXMLNamespace}}
}
%\examples{}
\keyword{IO}
XML/man/xmlStopParser.Rd 0000644 0001760 0000144 00000004461 11741563530 014641 0 ustar ripley users \name{xmlStopParser}
\alias{xmlStopParser}
\title{Terminate an XML parser}
\description{
This function allows an R-level function to terminate an
XML parser before it completes the processing of the XML content.
This might be useful, for example, in event-driven parsing
with \code{\link{xmlEventParse}} when we want
to read through an XML file until we find a record of interest.
Then, having retrieved the necessary information, we want to
terminate the parsing rather than let it pointlessly continue.
Instead of raising an error in our handler function, we can call
\code{xmlStopParser} and return. The parser will then take control
again and terminate and return back to the original R function from
which it was invoked.
The only argument to this function is a reference to internal C-level
which identifies the parser. This is passed by the R-XML parser
mechanism to a function invoked by the parser if that function
inherits (in the S3 sense) from the class \code{XMLParserContextFunction}.
}
\usage{
xmlStopParser(parser)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{parser}{ an object of class \code{XMLParserContext}
which must have been obtained by via an
\code{XMLParserContextFunction} function
called by the parser. This is just a handler function whose class
includes \code{XMLParserContextFunction}
}
}
\value{
\code{TRUE} if it succeeded and an error is raised
if the \code{parser} object is not valid.
}
\references{libxml2 \url{http://xmlsoft.org}}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlEventParse}}
}
\examples{
############################################
# Stopping the parser mid-way and an example of using XMLParserContextFunction.
startElement =
function(ctxt, name, attrs, ...) {
print(ctxt)
print(name)
if(name == "rewriteURI") {
cat("Terminating parser\n")
xmlStopParser(ctxt)
}
}
class(startElement) = "XMLParserContextFunction"
endElement =
function(name, ...)
cat("ending", name, "\n")
fileName = system.file("exampleData", "catalog.xml", package = "XML")
xmlEventParse(fileName, handlers = list(startElement = startElement, endElement = endElement))
}
\keyword{IO}
\keyword{programming}
\concept{Error handling}
\concept{streaming data}
XML/man/newXMLNamespace.Rd 0000644 0001760 0000144 00000003271 12030203162 014763 0 ustar ripley users \name{newXMLNamespace}
\alias{newXMLNamespace}
\title{Add a namespace definition to an XML node}
\description{
This function, and associated methods,
define a name space \code{prefix = URI} combination for the
given XML node.
It can also optionally make this name space the
default namespace for the node.
}
\usage{
newXMLNamespace(node, namespace, prefix = names(namespace), set = FALSE)
}
\arguments{
\item{node}{the XML node for which the name space is to be defined.}
\item{namespace}{the namespace(s).
This can be a simple character vector giving the URI,
a named character vector giving the prefix = URI pairs, with the prefixes being the names
of the character vector,
or one or more (a list) of \code{XMLNamespace} objects, e.g. returned from a call to
\code{\link{xmlNamespaceDefinitions}}}
\item{prefix}{
the prefixes to be associated with the URIs given in \code{namespace}.
}
\item{set}{ a logical value indicating whether to
set the namespace for this node to this newly created name space definition.
}
}
\value{
An name space definition object whose class corresponds
to the type of XML node given in \code{node}.
}
\note{
Currently, this only applies to XMLInternalNodes. This will be rectified shortly
and apply to RXMLNode and its non-abstract classes.
}
\references{ ~put references to the literature/web site here ~ }
\author{Duncan Temple Lang}
\seealso{
Constructors for different XML node types - \code{newXMLNode}
\code{xmlNode}.
\code{\link{newXMLNamespace}}.
}
\examples{
foo = newXMLNode("foo")
ns = newXMLNamespace(foo, "http://www.r-project.org", "r")
as(ns, "character")
}
\keyword{IO}
\keyword{programming}
\concept{XML}
XML/man/docName.Rd 0000644 0001760 0000144 00000003121 11741563530 013354 0 ustar ripley users \name{docName}
\alias{docName}
\alias{docName,XMLDocument-method}
\alias{docName,XMLDocumentContent-method}
\alias{docName,XMLHashTree-method}
\alias{docName,XMLInternalDocument-method}
\alias{docName,XMLInternalNode-method}
\alias{docName,XMLHashTreeNode-method}
\alias{docName,NULL-method}
\alias{docName,XMLNode-method}
\alias{docName<-}
\alias{docName<-,XMLInternalDocument-method}
\alias{docName<-,XMLHashTree-method}
\title{Accessors for name of XML document}
\description{
These functions and methods allow us to query and set the
\dQuote{name} of an XML document. This is intended to be
its URL or file name or a description of its origin if
raw XML content provided as a string.
}
\usage{
docName(doc, ...)
}
\arguments{
\item{doc}{the XML document object, of class
\code{XMLInternalDocument} or \code{XMLDocument}.
}
\item{\dots}{additional methods for methods}
}
\value{
A character string giving the name.
If the document was created from text, this is \code{NA}
(of class character).
The assignment function returns the updated object,
but the R assignment operation will return the value
on the right of the assignment!
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{xmlInternalTreeParse}}
\code{\link{newXMLDoc}}
}
\examples{
f = system.file("exampleData", "catalog.xml", package = "XML")
doc = xmlInternalTreeParse(f)
docName(doc)
doc = xmlInternalTreeParse("", asText = TRUE)
# an NA
docName(doc)
docName(doc) = "Simple XML example"
docName(doc)
}
\keyword{IO}
\keyword{programming}
\concept{XML}
XML/man/xmlStructuredStop.Rd 0000644 0001760 0000144 00000006573 11741563530 015557 0 ustar ripley users \name{xmlStructuredStop}
\alias{xmlStructuredStop}
\alias{xmlErrorCumulator}
\title{Condition/error handler functions for XML parsing}
\description{
These functions provide basic error handling for the XML parser in
R. They also illustrate the basics which will allow others to
provide customized error handlers that make more use of the
information provided in each error reported.
The \code{xmlStructuredStop} function provides a simple R-level handler for errors
raised by the XML parser.
It collects the information provided by the XML parser and
raises an R error.
This is only used if \code{NULL} is specified for the
\code{error} argument of \code{\link{xmlTreeParse}},
\code{\link{xmlTreeParse}} and \code{\link{htmlTreeParse}}.
The default is to use the function returned by a call to
\code{xmlErrorCumulator} as the error handler.
This, as the name suggests, cumulates errors.
The idea is to catch each error and let the parser continue
and then report them all.
As each error is encountered, it is collected by the function.
If \code{immediate} is \code{TRUE}, the error is also reported on
the console.
When the parsing is complete and has failed, this function is
invoked again with a zero-length character vector as the
message (first argument) and then it raises an error.
This function will then raise an R condition of class \code{class}.
}
\usage{
xmlStructuredStop(msg, code, domain, line, col, level, filename,
class = "XMLError")
xmlErrorCumulator(class = "XMLParserErrorList", immediate = TRUE)
}
\arguments{
\item{msg}{character string, the text of the message being reported}
\item{code}{ an integer code giving an identifier for the error (see
xmlerror.h) for the moment,}
\item{domain}{ an integer domain indicating in which "module" or part of the
parsing the error occurred, e.g. name space, parser, tree, xinclude, etc.}
\item{line}{ an integer giving the line number in the XML content
being processed corresponding to the error, }
\item{col}{ an integer giving the column position of the error, }
\item{level}{ an integer giving the severity of the error ranging from 1 to 3 in
increasing severity (warning, error, fatal),}
\item{filename}{character string, the name of the document being processed, i.e. its file name or
URL.}
\item{class}{ character vector, any classes to prepend to the class
attribute to make the error/condition. These are prepended to those
returned via \code{\link[base]{simpleError}}.}
\item{immediate}{logical value, if \code{TRUE} errors are
displayed on the R console as they are encountered. Otherwise, the
errors are collected and displayed at the end of the XML parsing.}
}
\value{
This calls \code{\link[base]{stop}} and so does not return a value.
}
\references{libxml2 and its error handling facilities (\url{http://xmlsoft.org}}
\author{ Duncan Temple Lang}
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{xmlInternalTreeParse}}
\code{\link{htmlTreeParse}}
}
\examples{
tryCatch( xmlTreeParse("", asText = TRUE, error = NULL),
XMLError = function(e) {
cat("There was an error in the XML at line",
e$line, "column", e$col, "\n",
e$message, "\n")
})
}
\keyword{IO }
\keyword{programming}
\concept{error handling}
XML/man/names.XMLNode.Rd 0000644 0001760 0000144 00000002261 12665242441 014363 0 ustar ripley users \name{names.XMLNode}
\alias{names.XMLNode}
\title{Get the names of an XML nodes children.}
\description{
This is a convenient way to obtain the XML tag name
of each of the sub-nodes of a given
\code{XMLNode} object.
}
\usage{
\method{names}{XMLNode}(x)
}
\arguments{
\item{x}{the \code{XMLNode} whose sub-node tag names are being
queried.}
}
\value{
A character vector returning the
tag names of the sub-nodes of the given
\code{XMLNode} argument.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\note{ This overrides the regular names method
which would display the names of the internal
fields of an \code{XMLNode} object.
Since these are intended to be invisible and
queried via the accessor methods (\code{\link{xmlName}},
\code{\link{xmlAttrs}}, etc.), this should not
be a problem. If you really need the names
of the fields, use \code{names(unclass(x))}.
}
\seealso{
\code{\link{xmlApply}}
\code{\link{xmlSApply}}
}
\examples{
doc <- xmlTreeParse(system.file("exampleData", "mtcars.xml", package="XML"))
names(xmlRoot(doc))
r <- xmlRoot(doc)
r[names(r) == "variables"]
}
\keyword{file}
XML/man/xmlNamespaceDefinitions.Rd 0000644 0001760 0000144 00000010065 12030576631 016622 0 ustar ripley users \name{xmlNamespaceDefinitions}
\alias{xmlNamespaceDefinitions}
\alias{getDefaultNamespace}
\alias{xmlNamespaces}
\alias{xmlNamespaces<-}
\alias{xmlNamespaces<-,XMLInternalNode-method}
\alias{xmlNamespaces<-,XMLNode-method}
\alias{coerce,NULL,XMLNamespaceDefinitions-method}
\alias{coerce,XMLNamespace,character-method}
\alias{coerce,XMLNamespaceDefinition,character-method}
\alias{coerce,XMLNamespaceDefinitions,character-method}
\alias{coerce,character,XMLNamespaceDefinitions-method}
\title{Get definitions of any namespaces defined in this XML node}
\description{
If the given node has any namespace definitions declared within it,
i.e. of the form \code{xmlns:myNamespace="http://www.myNS.org"},
\code{xmlNamespaceDefinitions} provides access to these definitions.
While they appear in the XML node in the document as attributes,
they are treated differently by the parser and so do not show up
in the nodes attributes via \code{\link{xmlAttrs}}.
\code{getDefaultNamespace} is used to get the default namespace
for the top-level node in a document.
The \code{recursive} parameter allows one to conveniently find all the namespace
definitions in a document or sub-tree without having to examine the file.
This can be useful when working with XPath queries via
\code{\link{getNodeSet}}.
}
\usage{
xmlNamespaceDefinitions(x, addNames = TRUE, recursive = FALSE, simplify = FALSE, ...)
xmlNamespaces(x, addNames = TRUE, recursive = FALSE, simplify = FALSE, ...)
getDefaultNamespace(doc, ns = xmlNamespaceDefinitions(doc, simplify = simplify),
simplify = FALSE)
}
\arguments{
\item{x}{the \code{XMLNode} object in which to find any namespace
definitions}
\item{addNames}{a logical indicating whether to compute the names for
the elements in the resulting list. The names are convenient, but
one can avoid the (very small) overhead of computing these with this
parameter.}
\item{doc}{the XMLInternalDocument object obtained from a call to
\code{\link{xmlParse}}
}
\item{recursive}{a logical value indicating whether to extract the
namespace definitions for just this node (\code{FALSE})
or all of the descendant nodes as well (\code{TRUE}).
If this is \code{TRUE}, all the namespace definitions are
collected into a single "flat" list and so there may be duplicate
names.
}
\item{simplify}{a logical value. If this is \code{TRUE},
a character vector of prefix-URI pairs is returned.
This can be used directly in calls to functions such as
\code{\link{xpathApply}} and \code{\link{getNodeSet}}.
The default value of \code{FALSE} returns a list
of name space definitions which also identify
whether the definition is local to the particular node or inherited
from an ancestor.
}
\item{ns}{the collection of namespaces. This is typically omitted
but can be specified if it has been computed in an earlier step.}
\item{\dots}{additional parameters for methods}
}
\value{
A list with as many elements as there are namespace definitions.
Each element is an object of class XMLNameSpace,
containing fields giving the local identifier, the associated defining
URI and a logical value indicating whether the definition is local to
this node.
The name of each element is the prefix or alias used for that
namespace definition, i.e. the value of the \code{id} field in the
namespace definition. For default namespaces, i.e. those that have no
prefix/alias, the name is \code{""}.
}
\references{\url{http://www.w3.org/XML}}
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{xmlAttrs}}
\code{\link{xmlGetAttr}}
}
\examples{
f = system.file("exampleData", "longitudinalData.xml", package = "XML")
n = xmlRoot(xmlTreeParse(f))
xmlNamespaceDefinitions(n)
xmlNamespaceDefinitions(n, recursive = TRUE)
# Now using internal nodes.
f = system.file("exampleData", "namespaces.xml", package = "XML")
doc = xmlInternalTreeParse(f)
n = xmlRoot(doc)
xmlNamespaceDefinitions(n)
xmlNamespaceDefinitions(n, recursive = TRUE)
}
\keyword{IO}
\concept{XML}
XML/man/getEncoding.Rd 0000644 0001760 0000144 00000002001 11741563530 014230 0 ustar ripley users \name{getEncoding}
%\Rdversion{1.1}
\alias{getEncoding}
\alias{getEncoding,XMLInternalDocument-method}
\alias{getEncoding,XMLInternalNode-method}
\alias{getEncoding,ANY-method}
\title{Determines the encoding for an XML document or node}
\description{
This function and its methods are intended to return the
encoding of n XML .
It is similar to \code{\link[base]{Encoding}} but currently
restricted to XML nodes and documents.
}
\usage{
getEncoding(obj, ...)
}
\arguments{
\item{obj}{the object whose encoding is being queried.}
\item{\dots}{any additional parameters which can be customized by the methods.}
}
\value{
A character vector of length 1 giving the encoding of the XML document.
}
\author{ Duncan Temple Lang}
\examples{
f = system.file("exampleData", "charts.svg", package = "XML")
doc = xmlParse(f)
getEncoding(doc)
n = getNodeSet(doc, "//g/text")[[1]]
getEncoding(n)
f = system.file("exampleData", "iTunes.plist", package = "XML")
doc = xmlParse(f)
getEncoding(doc)
}
\keyword{IO}
XML/man/replaceNodeWithChildren.Rd 0000644 0001760 0000144 00000001701 13610013626 016527 0 ustar ripley users \name{replaceNodeWithChildren}
\alias{replaceNodeWithChildren}
\title{Replace an XML node with it child nodes}
\description{
This function can be used to flatten parts of an XML tree.
This takes a node and removes itself from the tree, but places
its kids in it place.
}
\usage{
replaceNodeWithChildren(node)
}
\arguments{
\item{node}{an \code{XMLInternalNode} object}
}
\value{
\code{NULL}. The purpose of this function is to modify the internal document.
}
\references{
libxml2 documentation.
}
\author{
Duncan Temple Lang
}
\examples{
doc = xmlParse('
A
B
C
D
E
F
')
pages = getNodeSet(doc, "//page")
invisible(lapply(pages, replaceNodeWithChildren))
doc
}
\keyword{IO}
XML/man/xmlAttributeType.Rd 0000644 0001760 0000144 00000002024 12665242441 015336 0 ustar ripley users \name{xmlAttributeType}
\alias{xmlAttributeType}
\title{The type of an XML attribute for element from the DTD}
\description{
This examines the definition of the
attribute, usually returned by parsing the DTD with
\code{\link{parseDTD}} and determines
its type from the possible values:
Fixed, string data, implied,
required, an identifier,
an identifier reference, a list of identifier references,
an entity, a list of entities,
a name, a list of names, an element of enumerated set,
a notation entity.
}
\usage{
xmlAttributeType(def, defaultType=FALSE)
}
\arguments{
\item{def}{the attribute definition object, usually retrieved from
the DTD via \code{\link{parseDTD}}.}
\item{defaultType}{whether to return the default value
if this attribute is defined as being a value from an enumerated set.}
}
\value{
A string identifying the type for the sspecified attributed.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.omegahat.net/RSXML}}
\author{Duncan Temple Lang}
\seealso{
\code{\link{parseDTD}}
}
\keyword{file}
XML/man/parseXMLAndAdd.Rd 0000644 0001760 0000144 00000006412 12665242441 014544 0 ustar ripley users \name{parseXMLAndAdd}
\alias{parseXMLAndAdd}
\title{Parse XML content and add it to a node}
\description{
This function parses the given XML content as a string
by putting it inside a top-level node and then returns
the document or adds the children to the specified parent.
The motivation for this function is when we can use
string manipulation to efficiently create the XML content
by using vectorized operations in R, but then
converting that content into parsed nodes.
Generating XML/HTML content by glueing strings together
is a poor approach. It is often convenient, but rarely
good general software design. It makes for bad software that is not
very extensible and difficult to maintain and enhance.
Structure that it is
programmatically accessible is much better. The tree
approach provides this structure.
Using strings is convenient and somewhat appropriate when done
atomically for large amounts of highly regular content.
But then the results should be converted to the structured tree
so that they can be modified and extended.
This function facilitates using strings and returning structured content.
}
\usage{
parseXMLAndAdd(txt, parent = NULL, top = "tmp", nsDefs = character())
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{txt}{the XML content to parse}
\item{parent}{an XMLInternalNode to which the top-level nodes in
\code{txt} will be added as children}
\item{top}{the name for the top-level node. If \code{parent} is
specified, this is used but irrelevant.}
\item{nsDefs}{a character vector of name = value pairs giving
namespace definitions to be added to the top node.}
}
\value{
If \code{parent} is \code{NULL}, the root node of the
parsed document is returned. This will be an element
whose name is given by \code{top} unless the XML content in \code{txt}
is AsIs or \code{code} is empty.
If \code{parent} is non-\code{NULL}, .
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{newXMLNode}}
\code{\link{xmlParse}}
\code{\link{addChildren}}
}
\examples{
long = runif(10000, -122, -80)
lat = runif(10000, 25, 48)
txt = sprintf("\%.3f,\%.3f,0",
long, lat)
f = newXMLNode("Folder")
parseXMLAndAdd(txt, f)
xmlSize(f)
\dontrun{
# this version is much slower as i) we don't vectorize the
# creation of the XML nodes, and ii) the parsing of the XML
# as a string is very fast as it is done in C.
f = newXMLNode("Folder")
mapply(function(a, b) {
newXMLNode("Placemark",
newXMLNode("Point",
newXMLNode("coordinates",
paste(a, b, "0", collapse = ","))),
parent = f)
},
long, lat)
xmlSize(f)
o = c("dog", "cat")
node = parseXMLAndAdd(o, nsDefs = c("http://cran.r-project.org",
omg = "http://www.omegahat.net"))
xmlNamespace(node[[1]])
xmlNamespace(node[[2]])
tt = newXMLNode("myTop")
node = parseXMLAndAdd(o, tt, nsDefs = c("http://cran.r-project.org",
omg = "http://www.omegahat.net"))
tt
}
}
\keyword{IO}
XML/man/dtdIsAttribute.Rd 0000644 0001760 0000144 00000002502 12665242441 014744 0 ustar ripley users \name{dtdIsAttribute}
\alias{dtdIsAttribute}
\title{Query if a name is a valid attribute of a DTD element.}
\description{
Examines the definition of the DTD element definition identified
by \code{element} to see if it supports an attribute named
\code{name}.
}
\usage{
dtdIsAttribute(name, element, dtd)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{name}{The name of the attribute being queried}
\item{element}{The name of the element whose definition is to be used
to obtain the list of valid attributes.}
\item{dtd}{The DTD containing the definition of the elements,
specifically \code{element}.}
}
\value{
A logical value indicating if the
list of attributes suppported by the
specified element has an entry named
\code{name}.
This does indicate what type of value
that attribute has, whether it is required, implied,
fixed, etc.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{parseDTD}},
\code{\link{dtdElement}},
\code{\link{xmlAttrs}}
}
\examples{
dtdFile <- system.file("exampleData", "foo.dtd", package="XML")
foo.dtd <- parseDTD(dtdFile)
# true
dtdIsAttribute("numRecords", "dataset", foo.dtd)
# false
dtdIsAttribute("date", "dataset", foo.dtd)
}
\keyword{file}
XML/man/dtdValidElement.Rd 0000644 0001760 0000144 00000003466 12665242441 015070 0 ustar ripley users \name{dtdValidElement}
\alias{dtdValidElement}
\title{Determines whether an XML tag is valid within another.}
\description{
This tests whether \code{name} is a legitimate tag
to use as a direct sub-element of the \code{within} tag
according to the definition of the \code{within}
element in the specified DTD.
}
\usage{
dtdValidElement(name, within, dtd, pos=NULL)
}
\arguments{
\item{name}{The name of the tag which is to be inserted inside the
\code{within} tag.}
\item{within}{The name of the parent tag the definition of which we are checking
to determine if it contains \code{name}.}
\item{dtd}{The DTD in which the elements \code{name} and \code{within} are defined. }
\item{pos}{ An optional position at which we might add the
\code{name} element inside \code{within}. If this is specified, we have a stricter
test that accounds for sequences in which elements must appear in order.
These are comma-separated entries in the element definition.}
}
\details{
This applies to direct sub-elements
or children of the \code{within} tag and not tags nested
within children of that tag, i.e. descendants.
}
\value{
Returns a logical value.
TRUE indicates that a \code{name} element
can be used inside a \code{within} element.
FALSE indicates that it cannot.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{parseDTD}},
\code{\link{dtdElement}},
\code{\link{dtdElementValidEntry}},
}
\examples{
dtdFile <- system.file("exampleData", "foo.dtd", package="XML")
foo.dtd <- parseDTD(dtdFile)
# The following are true.
dtdValidElement("variable","variables", dtd = foo.dtd)
dtdValidElement("record","dataset", dtd = foo.dtd)
# This is false.
dtdValidElement("variable","dataset", dtd = foo.dtd)
}
\keyword{file}
XML/man/readKeyValueDB.Rd 0000644 0001760 0000144 00000003244 12122116453 014573 0 ustar ripley users \name{readKeyValueDB}
\alias{readKeyValueDB}
\alias{readKeyValueDB,character-method}
\alias{readKeyValueDB,XMLInternalDocument-method}
\alias{readKeyValueDB,XMLInternalNode-method}
\alias{readKeyValueDB,AsIs-method}
\title{Read an XML property-list style document}
\description{
This function and its methods reads an XML document
that is in the format of name-value or key-value
pairs made up of a \code{plist} and
\code{dict} nodes, each of which is made up \code{key}, and value node
pairs. These used to be used for property lists on OS X and
can represetn arbitrary data relatively conveniently.
}
\usage{
readKeyValueDB(doc, ...)
}
\arguments{
\item{doc}{the object containing the data. This can be the name of a
file, a parsed XML document or an XML node.}
\item{\dots}{additional parameters for the methods.
One can pass \code{dropComments} as a logical value to control
whether comment nodes are processed or ignored (\code{TRUE}).
}
}
\value{
An R object representing the data read from the XML content.
This is typically a named list or vector where the names are the keys
and the values are collected into an R "container".
}
\references{
Property lists.
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{readSolrDoc}},
\code{\link{xmlToList}},
\code{\link{xmlToDataFrame}},
\code{\link{xmlParse}}
}
\examples{
if(file.exists("/usr/share/hiutil/Stopwords.plist")) {
o = readKeyValueDB("/usr/share/hiutil/Stopwords.plist")
}
if(file.exists("/usr/share/java/Tools/Applet Launcher.app/Contents/Info.plist"))
javaInfo = readKeyValueDB('/usr/share/java/Tools/Applet Launcher.app/Contents/Info.plist')
}
\keyword{IO}
\concept{XML}
XML/man/readHTMLTable.Rd 0000644 0001760 0000144 00000017274 13427010675 014374 0 ustar ripley users \name{readHTMLTable}
\alias{readHTMLTable}
\alias{readHTMLTable,character-method}
\alias{readHTMLTable,HTMLInternalDocument-method}
\alias{readHTMLTable,XMLInternalElementNode-method}
\alias{coerce,character,FormattedInteger-method}
\alias{coerce,character,FormattedNumber-method}
\alias{coerce,character,Percent-method}
\alias{coerce,character,Currency-method}
\alias{FormattedInteger-class}
\alias{FormattedNumber-class}
\alias{Percent-class}
\title{Read data from one or more HTML tables}
\description{
This function and its methods provide somewhat robust methods for
extracting data from HTML tables in an HTML document.
One can read all the tables in a document given by filename or (\code{http:}
or \code{ftp:}) URL,
or having already parsed the document via \code{\link{htmlParse}}.
Alternatively, one can specify an individual \code{}
node in the document.
The methods attempt to do some heuristic computations to determine
the header labels for the columns, the name of the table, etc.
}
% xmlName(node) == "table" && ("thead" \%in\% names(node) || length(getNodeSet(node, "./tr[1]/th")) > 0)
\usage{
readHTMLTable(doc, header = NA,
colClasses = NULL, skip.rows = integer(), trim = TRUE,
elFun = xmlValue, as.data.frame = TRUE, which = integer(),
...)
}
\arguments{
\item{doc}{the HTML document which can be a file name or a URL
or an already parsed \code{HTMLInternalDocument}, or
an HTML node of class \code{XMLInternalElementNode},
or a character vector containing the HTML content to parse and process.}
\item{header}{either a logical value indicating whether the table has
column labels, e.g. the first row or a \code{thead}, or alternatively
a character vector giving the names to use for the resulting columns.
This can be a logical vector and the individual values will be used
in turn for the different tables. This allows the caller to control
whether individual tables are processed as having column names.
Alternatively, one can read a specific table via the \code{which}
parameter and control how that is processed with a single scalar logical.
}
\item{colClasses}{either a list or a vector that gives the names of
the data types for the different columns in the table, or
alternatively a function used to convert the string values to the
appropriate type. A value of \code{NULL} means that we should drop
that column from the result.
Note that currently the conversion occurs before the
vectors are converted to a data frame (if \code{as.data.frame} is
\code{TRUE}).
As a result, to ensure that character vectors remain as characters
and not factors, use \code{stringsAsFactors = FALSE}.
This typically applies only to an individual table and so for the
method applied to a \code{XMLInternalElementNode} object.
In addition to the usual "integer", "numeric", "logical", "character", etc.
names of R data types, one can use
"FormattedInteger", "FormattedNumber" and "Percent" to specify that
format of the values are numbers possibly with commas (,) separating
groups of digits or a number followed by a percent sign (\%).
This mechanism allows one to introduce new classes and specify these
as targets in \code{colClasses}.
}
\item{skip.rows}{an integer vector indicating which rows to ignore.}
\item{trim}{a logical value indicating whether to remove leading and
trailing white space from the content cells.}
\item{elFun}{a function which, if specified, is called when converting
each cell. Currently, only the node is specified. In the future, we might
additionally pass the index of the column so that the function has
some context, e.g. whether the value is a row label or a regular
value, or if the caller knows the type of columns.
}
\item{as.data.frame}{a logical value indicating whether to turn the
resluting table(s) into data frames or leave them as matrices.
}
\item{which}{an integer vector identifying which tables to return from
within the document. This applies to the method for the document, not
individual tables.}
\item{\dots}{currently additional parameters that are passed on to
\code{\link{as.data.frame}} if \code{as.data.frame} is \code{TRUE}.
We may change this to use these as additional arguments for calls to
\code{elFun}.}
}
\value{
If the document (either by name or parsed tree) is specified,
the return vale is a list of data frames or matrices.
If a single HTML node is provided
}
\references{
HTML4.0 specification
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{htmlParse}}
\code{\link{getNodeSet}}
\code{\link{xpathSApply}}
}
\examples{
\dontrun{
## This changed to using https: in June 2015, and that is unsupported.
# u = "http://en.wikipedia.org/wiki/World_population"
u = "https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population"
tables = readHTMLTable(u)
names(tables)
tables[[2]]
# Print the table. Note that the values are all characters
# not numbers. Also the column names have a preceding X since
# R doesn't allow the variable names to start with digits.
tmp = tables[[2]]
# Let's just read the second table directly by itself.
doc = htmlParse(u)
tableNodes = getNodeSet(doc, "//table")
tb = readHTMLTable(tableNodes[[2]])
# Let's try to adapt the values on the fly.
# We'll create a function that turns a th/td node into a val
tryAsInteger = function(node) {
val = xmlValue(node)
ans = as.integer(gsub(",", "", val))
if(is.na(ans))
val
else
ans
}
tb = readHTMLTable(tableNodes[[2]], elFun = tryAsInteger)
tb = readHTMLTable(tableNodes[[2]], elFun = tryAsInteger,
colClasses = c("character", rep("integer", 9)))
}
zz =
readHTMLTable("https://www.inflationdata.com/Inflation/Consumer_Price_Index/HistoricalCPI.aspx")
if(any(i <- sapply(zz, function(x) if(is.null(x)) 0 else ncol(x)) == 14)) {
# guard against the structure of the page changing.
zz = zz[[which(i)[1]]] # 4th table
# convert columns to numeric. Could use colClasses in the call to readHTMLTable()
zz[-1] = lapply(zz[-1], function(x) as.numeric(gsub(".* ", "", as.character(x))))
matplot(1:12, t(zz[-c(1, 14)]), type = "l")
}
# From Marsh Feldman on R-help, possibly
# https://stat.ethz.ch/pipermail/r-help/2010-March/232586.html
# That site was non-responsive in June 2015,
# and this does not do a good job on the current table.
\donttest{
doc <- "http://www.nber.org/cycles/cyclesmain.html"
# The main table is the second one because it's embedded in the page table.
tables <- getNodeSet(htmlParse(doc), "//table")
xt <- readHTMLTable(tables[[2]],
header = c("peak","trough","contraction",
"expansion","trough2trough","peak2peak"),
colClasses = c("character","character","character",
"character","character","character"),
trim = TRUE, stringsAsFactors = FALSE
)
}
if(FALSE) {
# Here is a totally different way of reading tables from HTML documents.
# The data are formatted using PRE and so can be read via read.table
u = "http://tidesonline.nos.noaa.gov/data_read.shtml?station_info=9414290+San+Francisco,+CA"
h = htmlParse(u)
p = getNodeSet(h, "//pre")
con = textConnection(xmlValue(p[[2]]))
tides = read.table(con)
}
\dontrun{
## This is not accessible without authentication ...
u = "http://www.omegahat.net/RCurl/testPassword/table.html"
if(require(RCurl) && url.exists(u)) {
tt = getURL(u, userpwd = "bob:duncantl")
readHTMLTable(tt)
}}
}
\keyword{IO}
\keyword{data}
XML/man/makeClassTemplate.Rd 0000644 0001760 0000144 00000004663 13607575000 015417 0 ustar ripley users \name{makeClassTemplate}
\alias{makeClassTemplate}
\title{Create S4 class definition based on XML node(s)}
\description{
This function is used to create an S4 class definition
by examining an XML node and mapping the sub-elements
to S4 classes. This works very simply with child nodes
being mapped to other S4 classes that are defined recursively in the
same manner. Simple text elements are mapped to a generic character string.
Types can be mapped to more specific types (e.g. boolean, Date, integer)
by the caller (via the \code{types}) parameter.
The function also generates a coercion method from an
\code{XMLAbstractNode} to an instance of this new class.
This function can either return the code that defines the class
or it can define the new class in the R session.
}
\usage{
makeClassTemplate(xnode, types = character(), default = "ANY",
className = xmlName(xnode), where = globalenv())
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{xnode}{the XML node to analyze}
\item{types}{a character vector mapping XML elements to R classes}
\item{default}{the default class to map an element to}
\item{className}{the name of the new top-level class to be defined.
This is the name of the XML node (without the name space)}
\item{where}{ typically either an environment or NULL.
This is used to control where the class and coercion method are
defined
or if \code{NULL} inhibits the code being evaluated. In this
case, the code is returned as strings.}
}
%\details{}
\value{
A list with 4 elements:
\item{name}{the name of the new class}
\item{slots}{a character vector giving the slot name and type name pairs}
\item{def}{code for defining the class}
\item{coerce}{code for defining the coercion method from an
XMLAbstractNode to an instance of the new class}
If \code{where} is not \code{NULL}, the class and coercion code
is actually evaluated and the class and method will be defined
in the R session as a side effect.
}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlToS4}}
}
\examples{
txt = paste0("ABCXYZ",
"3.54available")
doc = xmlParse(txt)
code = makeClassTemplate(xmlRoot(doc)[[1]], types = c(cost = "numeric"))
as(xmlRoot(doc)[["part"]], "part")
}
\keyword{programming}
\concept{reflection}
\concept{meta-programming}
XML/man/asXMLNode.Rd 0000644 0001760 0000144 00000002144 12665242441 013605 0 ustar ripley users \name{asXMLNode}
\alias{asXMLNode}
\alias{coerce,XMLInternalNode,XMLNode-method}
\title{Converts non-XML node objects to XMLTextNode objects}
\description{
This function is used to convert S objects that
are not already \code{XMLNode} objects
into objects of that class. Specifically,
it treats the object as a string and creates
an \code{XMLTextNode} object.
Also, there is a method for converting an XMLInternalNode
- the C-level libxml representation of a node - to
an explicit R-only object which contains the R values
of the data in the internal node.
}
\usage{
asXMLNode(x)
}
\arguments{
\item{x}{the object to be converted to an \code{XMLNode} object.
This is typically alread an object that inherits from \code{XMLNode}
or a string.}
}
\value{
An object of class XMLNode.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlNode}}
\code{\link{xmlTextNode}}
}
\examples{
# creates an XMLTextNode.
asXMLNode("a text node")
# unaltered.
asXMLNode(xmlNode("p"))
}
\keyword{file}
XML/man/xmlTree.Rd 0000644 0001760 0000144 00000013434 12665242441 013437 0 ustar ripley users \name{xmlTree}
\alias{xmlTree}
\title{An internal, updatable DOM object for building XML trees}
\description{
This is a mutable object (implemented via a closure)
for representing an XML tree, in the same
spirit as \code{\link{xmlOutputBuffer}}
and \code{\link{xmlOutputDOM}}
but that uses the internal structures of
libxml.
This can be used to create a DOM that can be
constructed in R and exported to another system
such as XSLT (\url{http://www.omegahat.net/Sxslt})
}
\usage{
xmlTree(tag, attrs = NULL, dtd=NULL, namespaces=list(),
doc = newXMLDoc(dtd, namespaces))
}
\arguments{
\item{tag}{the node or element name to use to create the new top-level node in the tree
or alternatively, an \code{XMLInternalNode} that was already
created.
This is optional. If it is not specified, no top-most node is
created but can be added using \code{addNode}.
If a top-level tag is added in the call to
\code{xmlTree}, that becomes the currently active or open
node (e.g. same as \code{addNode( ..., close = FALSE)})
and nodes subsequently added to this
}
\item{attrs}{attributes for the top-level node, in the form of a named
character vector.}
\item{dtd}{the name of the external DTD for this document.
If specified, this adds the DOCTYPE node to the resulting document.
This can be a node created earlier with a call to
\code{\link{newXMLDTDNode}}, or alternatively it can be a
character vector with 1, 2 or 3 elements
giving the name of the top-level node, and the public identifier
and the system identifier for the DTD in that order.
}
\item{namespaces}{a named character vector with each element giving the name space identifier and the
corresponding URI, \\
e.g \code{c(shelp = "http://www.omegahat.net/XML/SHelp")}
If \code{tag} is specified as a character vector, these name spaces
are defined within that new node.
}
\item{doc}{an internal XML document object, typically created with
\code{\link{newXMLDoc}}. This is used as the host document for all
the new nodes that will be created as part of this document.
If one wants to create nodes without an internal document ancestor,
one can alternatively specify this is as \code{NULL}.}
}
\details{
This creates a collection of functions that manipulate a shared
state to build and maintain an XML tree in C-level code.
}
\value{
An object of class
\code{XMLInternalDOM}
that extends \code{XMLOutputStream}
and has the same interface (i.e. ``methods'') as
\code{\link{xmlOutputBuffer}}
and \code{\link{xmlOutputDOM}}.
Each object has methods for
adding a new XML tag,
closing a tag, adding an XML comment,
and retrieving the contents of the tree.
\item{addTag}{create a new tag at the current position,
optionally leaving it as the active open tag to which
new nodes will be added as children}
\item{closeTag}{close the currently active tag
making its parent the active element into
which new nodes will be added.}
\item{addComment}{add an XML comment node
as a child of the active node in the document.}
\item{value}{retrieve an object representing the
XML tree. See \code{\link{saveXML}} to serialize the
contents of the tree.}
\item{add}{degenerate method in this context.}
}
\references{\url{http://www.w3.org/XML}, \url{http://www.xmlsoft.org},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\note{This is an early version of this function and I need to iron out some
of the minor details.}
\seealso{
\code{\link{saveXML}}
\code{\link{newXMLDoc}}
\code{\link{newXMLNode}}
\code{\link{xmlOutputBuffer}}
\code{\link{xmlOutputDOM}}
}
\examples{
z = xmlTree("people", namespaces = list(r = "http://www.r-project.org"))
z$setNamespace("r")
z$addNode("person", attrs = c(id = "123"), close = FALSE)
z$addNode("firstname", "Duncan")
z$addNode("surname", "Temple Lang")
z$addNode("title", "Associate Professor")
z$addNode("expertize", close = FALSE)
z$addNode("topic", "Data Technologies")
z$addNode("topic", "Programming Language Design")
z$addNode("topic", "Parallel Computing")
z$addNode("topic", "Data Visualization")
z$addNode("topic", "Meta-Computing")
z$addNode("topic", "Inter-system interfaces")
z$closeTag()
z$addNode("address", "4210 Mathematical Sciences Building, UC Davis")
z$closeTag()
tr <- xmlTree("CDataTest")
tr$addTag("top", close=FALSE)
tr$addCData("x <- list(1, a='&');\nx[[2]]")
tr$addPI("S", "plot(1:10)")
tr$closeTag()
cat(saveXML(tr$value()))
f = tempfile()
saveXML(tr, f, encoding = "UTF-8")
# Creating a node
x = rnorm(3)
z = xmlTree("r:data", namespaces = c(r = "http://www.r-project.org"))
z$addNode("numeric", attrs = c("r:length" = length(x)))
# shows namespace prefix on an attribute, and different from the one on the node.
z = xmlTree()
z$addNode("r:data", namespace = c(r = "http://www.r-project.org",
omg = "http://www.omegahat.net"),
close = FALSE)
x = rnorm(3)
z$addNode("r:numeric", attrs = c("omg:length" = length(x)))
z = xmlTree("examples")
z$addNode("example", namespace = list(r = "http://www.r-project.org"), close = FALSE)
z$addNode("code", "mean(rnorm(100))", namespace = "r")
x = summary(rnorm(1000))
d = xmlTree()
d$addNode("table", close = FALSE)
d$addNode("tr", .children = sapply(names(x), function(x) d$addNode("th", x)))
d$addNode("tr", .children = sapply(x, function(x) d$addNode("td", format(x))))
d$closeNode()
cat(saveXML(d))
# Dealing with DTDs and system and public identifiers for DTDs.
# Just doctype
za = xmlTree("people", dtd = "people")
# no public element
zb = xmlTree("people",
dtd = c("people", "", "http://www.omegahat.net/XML/types.dtd"))
# public and system
zc = xmlTree("people",
dtd = c("people", "//a//b//c//d",
"http://www.omegahat.net/XML/types.dtd"))
}
\keyword{IO}
XML/man/xmlContainsEntity.Rd 0000644 0001760 0000144 00000002317 12665242441 015511 0 ustar ripley users \name{xmlContainsEntity}
\alias{xmlContainsEntity}
\alias{xmlContainsElement}
\title{Checks if an entity is defined within a DTD.}
\description{
A DTD contains entity and element definitions.
These functions test whether a DTD contains a definition
for a particular named element or entity.
}
\usage{
xmlContainsEntity(name, dtd)
xmlContainsElement(name, dtd)
}
\arguments{
\item{name}{ The name of the element
or entity being queried.}
\item{dtd}{ The DTD in which to search for the entry.}
}
\details{
See \code{\link{parseDTD}} for more information about
DTDs, entities and elements.
}
\value{
A logical value indicating whether the entry
was found in the appropriate list of
entitiy or element definitions.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{ \code{\link{parseDTD}},
\code{\link{dtdEntity}},
\code{\link{dtdElement}},
}
\examples{
dtdFile <- system.file("exampleData", "foo.dtd", package="XML")
foo.dtd <- parseDTD(dtdFile)
# Look for entities.
xmlContainsEntity("foo", foo.dtd)
xmlContainsEntity("bar", foo.dtd)
# Now look for an element
xmlContainsElement("record", foo.dtd)
}
\keyword{file}
XML/man/xmlRoot.Rd 0000644 0001760 0000144 00000004566 12665242441 013471 0 ustar ripley users \name{xmlRoot}
\alias{xmlRoot}
\alias{xmlRoot.XMLDocument}
\alias{xmlRoot.XMLInternalDocument}
\alias{xmlRoot.XMLInternalDOM}
\alias{xmlRoot.XMLDocumentRoot}
\alias{xmlRoot.XMLDocumentContent}
\alias{xmlRoot.HTMLDocument}
\title{Get the top-level XML node.}
\description{
These are a collection of methods for providing easy access to the
top-level \code{XMLNode} object resulting from parsing an XML
document. They simplify accessing this node in the presence of
auxillary information such as DTDs, file name and version information
that is returned as part of the parsing.
}
\usage{
xmlRoot(x, skip = TRUE, ...)
\method{xmlRoot}{XMLDocumentContent}(x, skip = TRUE, ...)
\method{xmlRoot}{XMLInternalDocument}(x, skip = TRUE, addFinalizer = NA, ...)
\method{xmlRoot}{HTMLDocument}(x, skip = TRUE, ...)
}
\arguments{
\item{x}{the object whose root/top-level XML node is to be returned.}
\item{skip}{a logical value that controls whether DTD nodes and/or
XMLComment objects that appear
before the \dQuote{real} top-level node of the document should be ignored (\code{TRUE})
or not (\code{FALSE}) when returning the root node.}
\item{...}{arguments that are passed by the generic to the different specialized
methods of this generic.}
\item{addFinalizer}{a logical value or identifier for a C routine
that controls whether we register finalizers on the intenal node.}
}
\value{
An object of class \code{XMLNode}.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\note{One cannot obtain the parent or top-level node
of an XMLNode object in S. This is different from
languages like C, Java, Perl, etc. and is primarily
because S does not provide support for references.}
\seealso{
\code{\link{xmlTreeParse}}
\code{\link{[[.XMLNode}}
}
\examples{
doc <- xmlTreeParse(system.file("exampleData", "mtcars.xml", package="XML"))
xmlRoot(doc)
# Note that we cannot use getSibling () on a regular R-level XMLNode object
# since we cannot go back up or across the tree from that node, but
# only down to the children.
# Using an internal node via xmlParse (== xmlInternalTreeParse())
doc <- xmlParse(system.file("exampleData", "mtcars.xml", package="XML"))
n = xmlRoot(doc, skip = FALSE)
# skip over the DTD and the comment
d = getSibling(getSibling(n))
}
\keyword{file}
XML/man/SAXState-class.Rd 0000644 0001760 0000144 00000007170 12160531335 014547 0 ustar ripley users \name{SAXState-class}
\docType{class}
\alias{SAXState-class}
\title{A virtual base class defining methods for SAX parsing}
\description{
This is a degenerate virtual class which others are
expected to sub-class when they want to
use S4 methods as handler functions for SAX-based XML parsing.
The idea is that one can pass both i) a collection of handlers
to \code{\link{xmlEventParse}} which are simply
the generic functions for the different SAX actions,
and ii) a suitable object to maintain state across
the different SAX calls.
This is used to perform the method dispatching to get
the appropriate behavior for the action.
Each of these methods is expected to return the
updated state object and the SAX parser
will pass this in the next callback.
We define this class here so that we can provide
default methods for each of the different handler
actions. This allows other programmers to define
new classes to maintain state that are sub-class
of \code{SAXState} and then they do not have to
implement methods for each of the
different handlers.
}
\section{Objects from the Class}{A virtual Class: No objects may be created from it.}
\section{Methods}{
\describe{
\item{comment.SAX}{\code{signature(content = "ANY", .state = "SAXState")}: ... }
\item{endElement.SAX}{\code{signature(name = "ANY", .state = "SAXState")}: ... }
\item{entityDeclaration.SAX}{\code{signature(name = "ANY", base = "ANY", sysId = "ANY", publicId = "ANY", notationName = "ANY", .state = "SAXState")}: ... }
\item{processingInstruction.SAX}{\code{signature(target = "ANY", content = "ANY", .state = "SAXState")}: ... }
\item{startElement.SAX}{\code{signature(name = "ANY", atts = "ANY", .state = "SAXState")}: ... }
\item{text.SAX}{\code{signature(content = "ANY", .state = "SAXState")}: ... }
}
}
\references{\url{http://www.w3.org/XML}, \url{http://www.xmlsoft.org}}
\author{Duncan Temple Lang}
\seealso{
\code{\link{xmlEventParse}}
}
\examples{
# For each element in the document, grab the node name
# and increment the count in an vector for this name.
# We define an S4 class named ElementNameCounter which
# holds the vector of frequency counts for the node names.
setClass("ElementNameCounter",
representation(elements = "integer"), contains = "SAXState")
# Define a method for handling the opening/start of any XML node
# in the SAX streams.
setMethod("startElement.SAX", c(.state = "ElementNameCounter"),
function(name, atts, .state = NULL) {
if(name \%in\% names(.state@elements))
.state@elements[name] = as.integer(.state@elements[name] + 1)
else
.state@elements[name] = as.integer(1)
.state
})
filename = system.file("exampleData", "eurofxref-hist.xml.gz", package = "XML")
# Parse the file, arranging to have our startElement.SAX method invoked.
z = xmlEventParse(filename, genericSAXHandlers(),
state = new("ElementNameCounter"), addContext = FALSE)
z@elements
# Get the contents of all the comments in a character vector.
setClass("MySAXState",
representation(comments = "character"), contains = "SAXState")
setMethod("comment.SAX", c(.state = "MySAXState"),
function(content, .state = NULL) {
cat("comment.SAX called for MySAXState\n")
.state@comments <- c(.state@comments, content)
.state
})
filename = system.file("exampleData", "charts.svg", package = "XML")
st = new("MySAXState")
z = xmlEventParse(filename, genericSAXHandlers(useDotNames = TRUE), state = st)
z@comments
}
\keyword{classes}
XML/man/xmlName.Rd 0000644 0001760 0000144 00000002656 12665242441 013424 0 ustar ripley users \name{xmlName}
\alias{xmlName}
\alias{xmlName<-}
\alias{xmlName.XMLComment}
\alias{xmlName.XMLNode}
\alias{xmlName.XMLInternalNode}
\title{ Extraces the tag name of an XMLNode object.}
\description{
Each XMLNode object has an element or tag name introduced
in the \code{} entry in an XML document.
This function returns that name.
We can also set that name using \code{xmlName(node) <- "name"}
and the value can have an XML name space prefix, e.g.
\code{"r:name"}.
}
\usage{
xmlName(node, full = FALSE)
}
\arguments{
\item{node}{The XMLNode object whose tag name is being requested.}
\item{full}{a logical value indicating whether to prepend the
namespace prefix, if there is one, or return just the
name of the XML element/node. \code{TRUE} means prepend the prefix.}
}
\value{
A character vector of length 1
which is the \code{node$name} entry.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlChildren}},
\code{\link{xmlAttrs}},
\code{\link{xmlTreeParse}}
}
\examples{
fileName <- system.file("exampleData", "test.xml", package="XML")
doc <- xmlTreeParse(fileName)
xmlName(xmlRoot(doc)[[1]])
tt = xmlRoot(doc)[[1]]
xmlName(tt)
xmlName(tt) <- "bob"
# We can set the node on an internal object also.
n = newXMLNode("x")
xmlName(n)
xmlName(n) <- "y"
xmlName(n) <- "r:y"
}
\keyword{file}
XML/man/getChildrenStrings.Rd 0000644 0001760 0000144 00000003266 12160531705 015615 0 ustar ripley users \name{getChildrenStrings}
\alias{getChildrenStrings}
\title{Get the individual }
\description{
This is different from \code{xmlValue} applied to the node.
That concatenates all of the text in the child nodes (and their descendants)
This is a faster version of \code{xmlSApply(node, xmlValue)}
}
\usage{
getChildrenStrings(node, encoding = getEncoding(node),
asVector = TRUE, len = xmlSize(node), addNames = TRUE)
}
\arguments{
\item{node}{the parent node whose child nodes we want to process}
\item{encoding}{the encoding to use for the text. This should come
from the document itself. However, it can be useful to specify it if
the encoding has not been set for the document (e.g. if we are
constructing it node-by-node).}
\item{asVector}{a logical value that controls whether the result is
returned as a character vector or as a list (\code{FALSE}).
}
\item{len}{an integer giving the number of elements we expect
returned. This is best left unspecified but can be provided if the
caller already knows the number of child nodes. This avoids
recomputing this and so provides a marginal speedup.}
\item{addNames}{a logical value that controls whether we add the
element names to each element of the resulting vector. This makes it
easier to identify from which element each string came.}
}
\value{
A character vector.
}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{xmlValue}}
}
\examples{
doc = xmlParse("a string some text another")
getChildrenStrings(xmlRoot(doc))
doc = xmlParse("a string some text anotherabcxyz")
getChildrenStrings(xmlRoot(doc))
}
\keyword{programming}
XML/man/xmlSource.Rd 0000644 0001760 0000144 00000025467 12144542232 014002 0 ustar ripley users \name{xmlSource}
\alias{xmlSource}
\alias{xmlSource,character-method}
\alias{xmlSource,XMLNodeSet-method}
\alias{xmlSource,XMLInternalDocument-method}
\alias{xmlSourceFunctions}
\alias{xmlSourceFunctions,character-method}
\alias{xmlSourceFunctions,XMLInternalDocument-method}
\alias{xmlSourceSection}
\alias{xmlSourceSection,character-method}
\alias{xmlSourceSection,XMLInternalDocument-method}
\alias{xmlSourceThread}
\alias{xmlSourceThread,XMLInternalDocument-method}
\alias{xmlSourceThread,character-method}
\alias{xmlSourceThread,list-method}
\title{Source the R code, examples, etc. from an XML document}
\description{
This is the equivalent of a smart \code{\link[base]{source}}
for extracting the R code elements from an XML document and
evaluating them. This allows for a \dQuote{simple} way to collect
R functions definitions or a sequence of (annotated) R code segments in an XML
document along with other material such as notes, documentation,
data, FAQ entries, etc., and still be able to
access the R code directly from within an R session.
The approach enables one to use the XML document as a container for
a heterogeneous collection of related material, some of which
is R code.
In the literate programming parlance, this function essentially
dynamically "tangles" the document within R, but can work on
small subsets of it that are easily specified in the
\code{xmlSource} function call.
This is a convenient way to annotate code in a rich way
and work with source files in a new and potentially more effective
manner.
\code{xmlSourceFunctions} provides a convenient way to read only
the function definitions, i.e. the \code{} nodes.
We can restrict to a subset by specifying the node ids of interest.
\code{xmlSourceSection} allows us to evaluate the code in one or more
specific sections.
This style of authoring code supports mixed language support
in which we put, for example, C and R code together in the same
document.
Indeed, one can use the document to store arbitrary content
and still retrieve the R code. The more structure there is,
the easier it is to create tools to extract that information
using XPath expressions.
We can identify individual \code{r:code} nodes in the document to
process, i.e. evaluate. We do this using their \code{id} attribute
and specifying which to process via the \code{ids} argument.
Alternatively, if a document has a node \code{r:codeIds} as a child of
the top-level node (or within an invisible node), we read its contents as a sequence of line
separated \code{id} values as if they had been specified via the
argument \code{ids} to this function.
We can also use XSL to extract the code. See \code{getCode.xsl}
in the Omegahat XSL collection.
This particular version (as opposed to other implementations) uses
XPath to conveniently find the nodes of interest.
}
\usage{
xmlSource(url, ...,
envir = globalenv(),
xpath = character(),
ids = character(),
omit = character(),
ask = FALSE,
example = NA,
fatal = TRUE, verbose = TRUE, echo = verbose, print = echo,
xnodes = DefaultXMLSourceXPath,
namespaces = DefaultXPathNamespaces, section = character(),
eval = TRUE, init = TRUE, setNodeNames = FALSE, parse = TRUE,
force = FALSE)
xmlSourceFunctions(doc, ids = character(), parse = TRUE, ...)
xmlSourceSection(doc, ids = character(),
xnodes = c(".//r:function", ".//r:init[not(@eval='false')]",
".//r:code[not(@eval='false')]",
".//r:plot[not(@eval='false')]"),
namespaces = DefaultXPathNamespaces, ...)
}
\arguments{
\item{url}{the name of the file, URL containing the XML document, or
an XML string. This is passed to \code{\link[XML]{xmlTreeParse}}
which is called with \code{useInternalNodes = TRUE}.
}
\item{\dots}{additional arguments passed to \code{\link[XML]{xmlTreeParse}}}
\item{envir}{the environment in which the code elements of the XML
document are to be evaluated. By default, they are evaluated
in the global environment so that assignments take place there.
}
\item{xpath}{a string giving an XPath expression which is used after
parsing the document to filter the document to a particular subset of
nodes. This allows one to restrict the evaluation to a subset of
the original document. One can do this directly by
parsing the XML document, applying the XPath query and then passing
the resulting node set to this \code{xmlSource} function's
appropriate method. This argument merely allows for a more
convenient form of those steps, collapsing it into one action.
}
\item{ids}{a character vector. XML nodes containing R code
(e.g. \code{r:code}, \code{r:init}, \code{r:function},
\code{r:plot}) can have an id attribute. This vector
allows the caller to specify the subset of these nodes
to process, i.e. whose code will be evaluated.
The order is currently not important. It may be used
in the future to specify the order in which the nodes are evaluated.
If this is not specified and the document has a node
\code{r:codeIds} as an immediate child of the top-most node,
the contents of this node or contained within an \code{invisible}
node (so that it doesn't have to be filtered when rendering the
document), the names of the r:code id values to process are taken
as the individual lines from the body of this node.
}
\item{omit}{a character vector. The values of the id attributes of the
nodes that we want to skip or omit from the evaluation. This allows
us to specify the set that we don't want evaluated, in contrast to the
\code{ids} argument.
The order is not important.
}
\item{ask}{logical}
\item{example}{a character or numeric vector specifying the values of the id
attributes of any \code{r:example} nodes in the document.
A single document may contain numerous, separate examples
and these can be marked uniquely using an \code{id} attribute,
e.g. \code{}
\seealso{
\code{\link[XML]{xmlTreeParse}}
}
\examples{
xmlSource(system.file("exampleData", "Rsource.xml", package="XML"))
# This illustrates using r:frag nodes.
# The r:frag nodes are not processed directly, but only
# if referenced in the contents/body of a r:code node
f = system.file("exampleData", "Rref.xml", package="XML")
xmlSource(f)
}
\keyword{IO}
\keyword{programming}
\concept{Annotated code}
\concept{Literate Programming}
\concept{Mixed language}
XML/man/xmlSchemaValidate.Rd 0000644 0001760 0000144 00000004652 11741563530 015413 0 ustar ripley users \name{xmlSchemaValidate}
\alias{xmlSchemaValidate}
\alias{schemaValidationErrorHandler}
\title{Validate an XML document relative to an XML schema}
\description{
This function validates an XML document relative to an
XML schema to ensure that it has the correct structure,
i.e. valid sub-nodes, attributes, etc.
The \code{xmlSchemaValidationErrorHandler} is a function
that returns a list of functions which can be used to cumulate or
collect the errors and warnings from the schema validation operation.
}
\usage{
xmlSchemaValidate(schema, doc,
errorHandler = xmlErrorFun(),
options = 0L)
schemaValidationErrorHandler()
}
\arguments{
\item{schema}{an object of class \code{xmlSchemaRef} which is
usually the result of a call to \code{\link{xmlInternalTreeParse}}
with \code{isSchema = TRUE}, or \code{\link{xmlSchemaParse}}.}
\item{doc}{an XML document which has already been parsed into
a \code{XMLInternalDocument} or which is a file name or string
which is coerced to an \code{\link{XMLInternalDocument-class}} object}
\item{options}{an integer giving the options controlling the
validation. At present, this is either 0 or 1 and is essentially
irrelevant to us. It may be of value in the future.
}
\item{errorHandler}{ a function or a list whose first element is a function
which is then used as the collector for the warning and error
messages reported during the validation. For each warning or error,
this function is invoked and the class of the message is either
\code{XMLSchemaWarning} or \code{XMLSchemaError} respectively.
}
}
\value{
Typically, a list with 3 elements:
\item{status}{0 for validated, and non-zero for invalid}
\item{errors}{a character vector}
\item{warnings}{a character vector}
If an empty error handler is provided (i.e. \code{NULL})
just an integer indicating the status of the validation
is returned. 0 indicates everything was okay; a non-zero
value indicates a validation error. (-1 indicates an internal error
in libxml2)
}
\references{libxml2 www.xmlsoft.org }
\seealso{
\code{\link{xmlSchemaParse}}
}
\examples{
if(FALSE) {
xsd = xmlParse(system.file("exampleData", "author.xsd", package = "XML"), isSchema =TRUE)
doc = xmlInternalTreeParse(system.file("exampleData", "author.xml", package = "XML"))
xmlSchemaValidate(xsd, doc)
}
}
\keyword{IO}
\concept{validation}
\concept{XML}
\concept{schema}
XML/man/asXMLTreeNode.Rd 0000644 0001760 0000144 00000003356 13610031125 014415 0 ustar ripley users \name{asXMLTreeNode}
\alias{asXMLTreeNode}
\title{Convert a regular XML node to one for use in a "flat" tree}
\description{
This coerces a regular R-based XML node (i.e. not an internal C-level
node) to a form that can be inserted into a flat tree, i.e.
one that stores the nodes in a non-hierarchical manner.
It is thus used in conjunction with
\code{\link{xmlHashTree}}
%% and \code{\link{xmlFlatListTree}}.
It adds \code{id} and \code{env} fields to the
node and specializes the class by prefixing \code{className}
to the class attribute.
This is not used very much anymore as we use the internal nodes for
most purposes.
}
\usage{
asXMLTreeNode(node, env, id = get(".nodeIdGenerator", env)(xmlName(node)),
className = "XMLTreeNode")
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{node}{the original XML node}
\item{env}{the \code{XMLFlatTree} object into which this node will be inserted.}
\item{id}{the identifier for the node in the flat tree. If this is not
specified, we consult the tree itself and its built-in identifier
generator. By default, the name of the node is used as its
identifier unless there is another node with that name.
}
\item{className}{a vector of class names to be prefixed to the
existing class vector of the node.}
}
\value{
An object of class \code{className}, i.e. by default
\code{"XMLTreeNode"}.
}
\references{\url{http://www.w3.org/XML}}
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlHashTree}}
%% \code{\link{xmlFlatListTree}}
}
\examples{
txt = 'some textother text'
doc = xmlTreeParse(txt)
class(xmlRoot(doc))
as(xmlRoot(doc), "XMLInternalNode")
}
\keyword{IO}
\concept{XML}
XML/man/compareXMLDocs.Rd 0000644 0001760 0000144 00000003177 12030205125 014622 0 ustar ripley users \name{compareXMLDocs}
\alias{compareXMLDocs}
\title{Indicate differences between two XML documents}
\description{
This function is an attempt to provide some assistance
in determining if two XML documents are the same and if
not, how they differ. Rather than comparing
the tree structure, this function compares
the frequency distributions of the names of the
node. It omits position, attributes, simple content
from the comparison. Those are left to the functions
that have more contextual information to compare two documents.
}
\usage{
compareXMLDocs(a, b, ...)
}
\arguments{
\item{a,b}{two parsed XML documents that must be internal documents, i.e. created with
\code{\link{xmlParse}} or created with \code{\link{newXMLNode}}.}
\item{\dots}{additional parameters that are passed on to the \code{summary} method for an internal document.}
}
\value{
A list with elements
\item{inA}{the names and counts of the XML elements that only appear in the first document}
\item{inB}{the names and counts of the XML elements that only appear in the second document}
\item{countDiffs}{a vector giving the difference in number of nodes with a particular name.}
These give a description of what is missing from one document relative to the other.
}
%\references{}
\author{
Duncan Temple Lang
}
\seealso{
\code{\link{getNodeSet}}
}
\examples{
tt =
'
text
a phrase
'
a = xmlParse(tt, asText = TRUE)
b = xmlParse(tt, asText = TRUE)
d = getNodeSet(b, "//d")[[1]]
xmlName(d) = "bob"
addSibling(xmlParent(d), newXMLNode("c"))
compareXMLDocs(a, b)
}
\keyword{IO}
XML/man/dtdElement.Rd 0000644 0001760 0000144 00000003742 12665242441 014105 0 ustar ripley users \name{dtdElement}
\alias{dtdElement}
\alias{dtdEntity}
\title{Gets the definition of an element or entity from a DTD.}
\description{
A DTD in R consists of both element and entity definitions.
These two functions provide simple access to
individual elements of these two lists, using the name
of the element or entity.
The DTD is provided to determine where to look for the
entry.
}
\usage{
dtdElement(name,dtd)
dtdEntity(name,dtd)
}
\arguments{
\item{name}{The name of the element being retrieved/acessed.}
\item{dtd}{The DTD from which the element is to be retrieved.}
}
\details{
An element within a DTD contains
both the list of sub-elements it can contain and a list of attributes
that can be used within this tag type.
\code{dtdElement} retrieves the
element by name from the specified DTD definition.
Entities within a DTD are like macros or text substitutes used
within a DTD and/or XML documents that use it.
Each consists of a name/label and a definition, the text
that is substituted when the entity is referenced.
\code{dtdEntity} retrieves the entity definition
from the DTD.
\\
One can read a DTD
directly (using \code{\link{parseDTD}}) or implicitly when reading a
document (using \code{\link{xmlTreeParse}})
The names of all available elements can be obtained from the expression
\code{names(dtd$elements)}.
This function is simply a convenience for
indexing this \code{elements} list.
}
\value{
An object of class \code{XMLElementDef}.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{parseDTD}},
\code{\link{dtdValidElement}}
}
\examples{
dtdFile <- system.file("exampleData","foo.dtd", package="XML")
foo.dtd <- parseDTD(dtdFile)
# Get the definition of the `entry1' element
tmp <- dtdElement("variable", foo.dtd)
xmlAttrs(tmp)
tmp <- dtdElement("entry1", foo.dtd)
# Get the definition of the `img' entity
dtdEntity("img", foo.dtd)
}
\keyword{file}
XML/man/xmlValue.Rd 0000644 0001760 0000144 00000004445 12665242441 013616 0 ustar ripley users \name{xmlValue}
\alias{xmlValue}
\alias{xmlValue.XMLCDataNode}
\alias{xmlValue.XMLNode}
\alias{xmlValue.XMLProcessingInstruction}
\alias{xmlValue.XMLTextNode}
\alias{xmlValue.XMLComment}
\alias{xmlValue<-}
\alias{xmlValue<-,XMLAbstractNode-method}
\alias{xmlValue<-,XMLInternalTextNode-method}
\alias{xmlValue<-,XMLTextNode-method}
\alias{coerce,XMLInternalTextNode,character-method}
\title{Extract or set the contents of a leaf XML node}
\description{
Some types of XML nodes have no children nodes, but are leaf nodes and
simply contain text. Examples are \code{XMLTextMode}, \code{XMLProcessingInstruction}.
This function provides access to their raw contents.
This has been extended to operate recursivel on arbitrary XML nodes
that contain a single text node.
}
\usage{
xmlValue(x, ignoreComments = FALSE, recursive = TRUE,
encoding = getEncoding(x), trim = FALSE)
}
\arguments{
\item{x}{the \code{XMLNode} object whose
contents are to be returned.}
\item{ignoreComments}{a logical value which, if \code{TRUE}
does not include the text in XML comment nodes.
If this is \code{FALSE}, the text in the comments is part
of the return value.
}
\item{recursive}{a logical value indicating whether to process all
sub-nodes (\code{TRUE}) or only the text nodes within the node \code{x}.
}
%XXX
\item{encoding}{experimental functionality and parameter related to
encoding.}
\item{trim}{a logical value controlling whether we remove leading or
trailing white space when returning the string value}
}
\value{
The object stored in the
\code{value} slot of the \code{XMLNode} object.
This is typically a string.
}
\references{\url{http://www.w3.org/XML}, \url{http://www.jclark.com/xml},
\url{http://www.omegahat.net} }
\author{ Duncan Temple Lang }
\seealso{
\code{\link{xmlChildren}}
\code{\link{xmlName}}
\code{\link{xmlAttrs}}
\code{\link{xmlNamespace}}
}
\examples{
node <- xmlNode("foo", "Some text")
xmlValue(node)
xmlValue(xmlTextNode("some more raw text"))
# Setting the xmlValue().
a = newXMLNode("a")
xmlValue(a) = "the text"
xmlValue(a) = "different text"
a = newXMLNode("x", "bob")
xmlValue(a) = "joe"
b = xmlNode("bob")
xmlValue(b) = "Foo"
xmlValue(b) = "again"
b = newXMLNode("bob", "some text")
xmlValue(b[[1]]) = "change"
b
}
\keyword{file}
XML/man/xmlAttrs.Rd 0000644 0001760 0000144 00000005733 12160564432 013635 0 ustar ripley users \name{xmlAttrs}
\alias{xmlAttrs}
\alias{xmlAttrs<-}
\alias{xmlAttrs.XMLElementDef}
\alias{xmlAttrs<-,XMLNode}
\alias{xmlAttrs<-,XMLInternalNode}
\alias{xmlAttrs<-,XMLNode-method}
\alias{xmlAttrs<-,XMLInternalElementNode-method}
\alias{xmlAttrs.XMLNode}
\alias{xmlAttrs.XMLInternalNode}
\title{ Get the list of attributes of an XML node. }
\description{
This returns a named character vector giving the
name-value pairs of attributes of an XMLNode object which is part of
an XML document.
}
\usage{
xmlAttrs(node, ...)
'xmlAttrs<-'(node, append = TRUE, suppressNamespaceWarning =
getOption("suppressXMLNamespaceWarning", FALSE), value)
}
\arguments{
\item{node}{The \code{XMLNode} object
whose attributes are to be extracted. }
\item{append}{a logical value indicating whether to add the attributes in \code{value} to the existing attributes
within the XML node, or to replace the set of any existing attributes with this new set, i.e. remove the existing ones and
then set the attributes with the contents of \code{value}.}
\item{...}{additional arguments for the specific methods. For XML
internal nodes, these are \code{addNamespacePrefix} and
\code{addNamespaceURLs}. These are both logical values and indicate
whether to prepend the name of the attribute with the namespace
prefix and also whether to return the namespace prefix and URL as a
vector in the \code{namespaces} attribute.}
\item{value}{a named character vector giving the new attributes to be
added to the node.}
\item{suppressNamespaceWarning}{see \code{\link{addChildren}}}
}
\value{
A named character vector, where the names
are the attribute names and the
elements are the corresponding values.
This corresponds to the (attr, "value")
pairs in the XML tag
\code{
')
xmlAttrs(xmlRoot(doc)[[1]], TRUE, TRUE)
xmlAttrs(xmlRoot(doc)[[1]], FALSE, TRUE)
xmlAttrs(xmlRoot(doc)[[1]], TRUE, FALSE)
xmlAttrs(xmlRoot(doc)[[1]], FALSE, FALSE)
}
\keyword{IO}
\keyword{file}
XML/man/ensureNamespace.Rd 0000644 0001760 0000144 00000003172 11741563530 015132 0 ustar ripley users \name{ensureNamespace}
\alias{ensureNamespace}
\title{Ensure that the node has a definition for particular XML namespaces}
\description{
This function is a helper function for use in creating XML content.
We often want to create a node that will be part of a larger XML tree
and use a particular namespace for that node name. Rather than
defining the namespace in each new node, we want to ensure that
it is define on an ancestor node. This function aids in that task.
We call the function with the ancestor node or top-level document
and have it check whether the namespace is already defined or have
it add it to the node and return.
This is intended for use with \code{XMLInternalNode} objects
which are direclty mutable (rather than changing a copy of the node
and having to insert that back into the larger tree.)
}
\usage{
ensureNamespace(doc, what)
}
\arguments{
\item{doc}{an \code{XMLInternalDocument} or \code{XMLInternalNode} on
which the namespace is to be defined. If this is a documentm, we use
the root node.}
\item{what}{a named character vector giving the URIs for the namespace
definitions and the names giving the desired prefixes}
}
\value{
This is used for the potential side effects of modifying the
XML node to add (some of) the namespaces as needed.
}
\references{XML namespaces}
\author{Duncan Temple Lang}
\seealso{
\code{\link{newXMLNamespace}}
\code{\link{newXMLNode}}
}
\examples{
doc = newXMLDoc()
top = newXMLNode("article", doc = doc)
ensureNamespace(top, c(r = "http://www.r-project.org"))
b = newXMLNode("r:code", parent = top)
print(doc)
}
\keyword{IO}
\concept{XML}
XML/man/processXInclude.Rd 0000644 0001760 0000144 00000003754 11741563530 015134 0 ustar ripley users \name{processXInclude}
\alias{processXInclude}
\alias{processXInclude.list}
\alias{processXInclude.XMLInternalDocument}
\alias{processXInclude.XMLInternalElement}
\title{Perform the XInclude substitutions}
\description{
This function and its methods process the XInclude directives
within the document of the form \code{