")
children <- xml_children(x)
x <- xml_find_first(children, ".//b|.//i")
expect_equal(xml_name(x[[1]]), "b")
expect_equal(xml_name(x[[2]]), "i")
expect_equal(xml_name(x[[3]]), NA_character_)
expect_equal(xml_name(x), c("b", "i", NA_character_))
})
test_that("qualified names returned when ns given", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)
bars <- xml_children(xml_children(x))
expect_equal(xml_name(bars), c("bar", "bar"))
expect_equal(xml_name(bars, ns), c("d1:bar", "d2:bar"))
})
test_that("error if missing ns spec", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)[1]
bars <- xml_children(xml_children(x))
expect_snapshot(error = TRUE, xml_name(bars, ns))
})
test_that("xml_name<- modifies the name", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)
bars <- xml_children(xml_children(x))
bar <- bars[[1]]
xml_name(bar) <- "foo"
expect_equal(xml_name(bar), "foo")
expect_equal(xml_name(bar, ns), "d1:foo")
# ns is ignored
xml_name(bar, ns) <- "bar"
expect_equal(xml_name(bar), "bar")
expect_equal(xml_name(bar, ns), "d1:bar")
xml_name(bars) <- "foo"
expect_equal(xml_name(bars), c("foo", "foo"))
old_mss <- mss <- xml_missing()
xml_name(mss) <- "foo"
expect_identical(old_mss, mss)
})
test_that("xml_set_name modifies the name", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)
bars <- xml_children(xml_children(x))
bar <- bars[[1]]
xml_set_name(bar, "foo")
expect_equal(xml_name(bar), "foo")
expect_equal(xml_name(bar, ns), "d1:foo")
# ns is ignored
xml_set_name(bar, "bar", ns)
expect_equal(xml_name(bar), "bar")
expect_equal(xml_name(bar, ns), "d1:bar")
xml_set_name(bars, "foo")
expect_equal(xml_name(bars), c("foo", "foo"))
old_mss <- mss <- xml_missing()
xml_set_name(mss, "foo")
expect_identical(old_mss, mss)
})
xml2/tests/testthat/test-format.R 0000644 0001762 0000144 00000001167 14727652467 016557 0 ustar ligges users test_that("format.xml_node prints attributes for root nodes", {
x <- read_xml("")
expect_equal(format(x), "")
})
test_that("format.xml_node prints namespaces for root nodes", {
x <- read_xml("")
expect_equal(format(x), "")
y <- read_xml("")
expect_equal(format(y), "")
z <- read_xml("")
expect_equal(format(z), "")
})
xml2/tests/testthat/records.xml 0000644 0001762 0000144 00000000217 14727652467 016345 0 ustar ligges users
&hoqc;
xml2/tests/testthat/test-xml_attr.R 0000644 0001762 0000144 00000016275 14727652467 017127 0 ustar ligges users test_that("missing attributes returned as NA by default", {
x <- read_xml("")
expect_equal(xml_attr(x, "id"), NA_character_)
})
test_that("missing attributes returned as NA", {
x <- read_xml("")
expect_equal(xml_attr(x, "id", default = 1), "1")
})
test_that("attributes are correctly found", {
x <- read_xml("")
expect_true(xml_has_attr(x, "id"))
expect_false(xml_has_attr(x, "id2"))
})
test_that("returning an attribute node prints properly", {
x <- read_xml("")
t1 <- xml_find_first(x, "//@c")
expect_equal(format(t1), "")
})
# Namespaces -------------------------------------------------------------------
# Default namespace doesn't apply to attributes
test_that("qualified names returned when ns given", {
x <- read_xml(test_path("ns-multiple.xml"))
ns <- xml_ns(x)
bars <- xml_children(xml_children(x))
attr <- xml_attrs(bars, ns)
expect_named(attr[[1]], "f:id")
expect_named(attr[[2]], "g:id")
})
x <- read_xml('
')
doc <- xml_children(x)[[1]]
docs <- xml_find_all(x, "//doc")
ns <- xml_ns(x)
test_that("qualified attributes get own values", {
expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "f:id" = "f", "id" = ""))
})
test_that("unqualified name gets unnamespace attribute", {
expect_equal(xml_attr(doc, "id", ns), "")
})
test_that("namespace names gets namespaced attribute", {
expect_equal(xml_attr(doc, "b:id", ns), "b")
expect_equal(xml_attr(doc, "f:id", ns), "f")
})
test_that("xml_attr<- modifies properties", {
xml_attr(doc, "id", ns) <- "test"
expect_equal(xml_attr(doc, "id", ns), "test")
xml_attr(doc, "b:id", ns) <- "b_test"
expect_equal(xml_attr(doc, "b:id", ns), "b_test")
xml_attr(doc, "f:id", ns) <- "f_test"
expect_equal(xml_attr(doc, "f:id", ns), "f_test")
xml_attr(docs, "f:id", ns) <- "f_test2"
expect_equal(xml_attr(docs, "f:id", ns), c("f_test2", "f_test2"))
xml_attr(docs, "f:id", ns) <- NULL
expect_equal(xml_attr(docs, "f:id", ns), c(NA_character_, NA_character_))
})
test_that("xml_attr<- recycles values", {
x <- read_xml("")
a <- xml_find_all(x, "a")
xml_attr(a, "b") <- c("e", "f")
expect_equal(xml_attr(a, "b"), c("e", "f"))
})
test_that("xml_attrs<- modifies all attributes", {
expect_error(xml_attrs(doc) <- 1, "`value` must be a named character vector or `NULL`")
expect_error(xml_attrs(doc) <- "test", "`value` must be a named character vector or `NULL`")
xml_attrs(doc, ns) <- c("b:id" = "b", "f:id" = "f", "id" = "test")
expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "id" = "test", "f:id" = "f"))
xml_attrs(doc, ns) <- c("b:id" = "b", "f:id" = "f")
expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "f:id" = "f"))
xml_attrs(doc, ns) <- c("b:id" = "b", "id" = "test")
expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "id" = "test"))
expect_snapshot(error = TRUE, xml_attrs(docs) <- "test")
xml_attrs(docs, ns) <- c("b:id" = "b", "id" = "test")
expect_equal(
xml_attrs(docs, ns),
list(
c("b:id" = "b", "id" = "test"),
c("b:id" = "b", "id" = "test")
)
)
xml_attrs(docs, ns) <- NULL
expect_equal(xml_attrs(docs, ns), list(setNames(character(0), character()), setNames(character(0), character())))
})
test_that("xml_attr<- accepts non-character values", {
x <- read_xml("")
svg <- xml_root(x)
xml_attr(svg, "width") <- 8L
expect_equal(xml_attr(svg, "width"), "8")
xml_attr(svg, "height") <- 12.5
expect_equal(xml_attr(svg, "height"), "12.5")
expect_equal(xml_attrs(svg), c(width = "8", height = "12.5"))
xml_attrs(svg) <- c(width = 14L, height = 23.45)
expect_equal(xml_attrs(svg), c(width = "14", height = "23.45"))
})
test_that("xml_attr<- can set empty strings, and removes attributes with NULL", {
x <- read_xml("")
xml_attr(x, "test") <- ""
expect_equal(xml_attr(x, "test"), "")
xml_attr(x, "test") <- NULL
expect_equal(xml_attr(x, "test"), NA_character_)
})
test_that("xml_attr<- removes namespaces if desired", {
xml_attr(x, "xmlns:b") <- NULL
expect_equal(xml_attrs(x), c("xmlns:f" = "http://foo.com"))
})
test_that("xml_attr<- removes namespaces if desired", {
x <- read_xml("")
# cannot find //b with a default namespace
expect_length(xml_find_all(x, "//b"), 0)
# unless we specify it explicitly
expect_length(xml_find_all(x, "//b"), 0)
expect_length(xml_find_all(x, "//d1:b", xml_ns(x)), 1)
# but can find it once we remove the namespace
xml_attr(x, "xmlns") <- NULL
expect_length(xml_find_all(x, "//b"), 1)
# and add the old namespace back
xml_attr(x, "xmlns") <- "tag:foo"
expect_equal(xml_attr(x, "xmlns"), "tag:foo")
expect_length(xml_find_all(x, "//b"), 0)
expect_length(xml_find_all(x, "//d1:b", xml_ns(x)), 1)
expect_equal(xml_attr(x, "xmlns"), "tag:foo")
})
test_that("xml_attr<- removes prefixed namespaces if desired", {
x <- read_xml("")
# cannot find //b with a prefixed namespace
expect_length(xml_find_all(x, "//b"), 0)
# unless we specify it explicitly
expect_length(xml_find_all(x, "//b"), 0)
expect_length(xml_find_all(x, "//pre:b", xml_ns(x)), 1)
# but can find it once we remove the namespace
xml_attr(x, "xmlns:pre") <- NULL
expect_length(xml_find_all(x, "//b"), 1)
# and add the old namespace back
xml_attr(x, "xmlns:pre") <- "tag:foo"
xml_set_namespace(xml_children(x)[[1]], "pre")
expect_equal(xml_attr(x, "xmlns:pre"), "tag:foo")
expect_length(xml_find_all(x, "//b"), 0)
expect_length(xml_find_all(x, "//pre:b", xml_ns(x)), 1)
expect_equal(xml_attr(x, "xmlns:pre"), "tag:foo")
})
test_that("xml_set_attr works identically to xml_attr<-", {
content <- ""
x <- read_xml(content)
y <- read_xml(content)
xml_attr(x, "a") <- "test"
xml_set_attr(y, "a", "test")
expect_equal(as.character(x), as.character(y))
bx <- xml_find_all(x, "//b")
by <- xml_find_all(y, "//b")
xml_attr(bx, "b") <- "test2"
xml_set_attr(by, "b", "test2")
expect_equal(as.character(x), as.character(y))
# No errors for xml_missing
mss <- xml_find_first(bx, "./c")
expect_no_error(xml_attr(mss[[2]], "b") <- "blah")
expect_no_error(xml_set_attr(mss[[2]], "b", "blah"))
})
test_that("xml_set_attrs works identically to xml_attrs<-", {
content <- ""
x <- read_xml(content)
y <- read_xml(content)
xml_attrs(x) <- c(a = "test")
xml_set_attrs(y, c(a = "test"))
expect_equal(as.character(x), as.character(y))
bx <- xml_find_all(x, "//b")
by <- xml_find_all(y, "//b")
xml_attrs(bx) <- c(b = "test2")
xml_set_attrs(by, c(b = "test2"))
expect_equal(as.character(x), as.character(y))
# No errors for xml_missing
mss <- xml_find_first(bx, "./c")
expect_no_error(xml_attrs(mss[[2]]) <- c("b" = "blah"))
expect_no_error(xml_set_attrs(mss[[2]], c("b" = "blah")))
})
test_that("xml_set_attr can set the same namespace multiple times", {
doc <- xml_new_root("foo")
xml_set_attr(doc, "xmlns:bar", "http://a/namespace")
xml_set_attr(doc, "xmlns:bar", "http://b/namespace")
expect_equal(xml_attr(doc, "xmlns:bar"), "http://b/namespace")
})
xml2/tests/testthat/records.dtd 0000644 0001762 0000144 00000000242 14727652467 016316 0 ustar ligges users
xml2/tests/testthat/test-xml_text.R 0000644 0001762 0000144 00000004014 14727652467 017125 0 ustar ligges users test_that("xml_text returns only text without markup", {
x <- read_xml("
This is some text. This is bold!
")
expect_identical(xml_text(x), "This is some text. This is bold!")
expect_identical(xml_text(xml_children(x)), "bold!")
})
test_that("xml_text works properly with xml_nodeset objects", {
x <- read_xml("
Some text.
Some other.
No bold text
")
children <- xml_children(x)
x <- xml_find_first(children, ".//b|.//i")
expect_identical(
xml_text(x),
c("text", "other", NA)
)
})
test_that("xml_text<- and xml_set_text work properly with xml_nodeset objects", {
x <- read_xml("This is some text. This is some nested text.")
expect_identical(xml_text(x), "This is some text. This is some nested text.")
xml_text(x) <- "test"
expect_identical(xml_text(x), "testThis is some nested text.")
xml_set_text(x, "test2")
expect_identical(xml_text(x), "test2This is some nested text.")
})
test_that("xml_text trims whitespace if requested, including non-breaking spaces", {
x <- read_html("
Some text €
")
expect_identical(
xml_text(x),
" Some text \u20ac \u00a0"
)
expect_identical(
xml_text(x, trim = TRUE),
"Some text \u20ac"
)
x2 <- read_html("
Some text €
and more € text ")
expect_identical(
xml_text(xml_find_all(x2, ".//p"), trim = TRUE),
c("Some text \u20ac", "and more \u20ac text")
)
})
test_that("xml_integer() returns an integer vector", {
x <- read_xml("")
expect_identical(
xml_integer(xml_find_all(x, "//@x")),
c(1L, 2L)
)
})
test_that("xml_double() returns a numeric vector", {
x <- read_xml("")
expect_identical(xml_double(xml_find_all(x, "//@latitude")), c(42.3466456, -36.8523378))
})
xml2/tests/testthat/test-xml_nodeset.R 0000644 0001762 0000144 00000005320 14727652467 017603 0 ustar ligges users test_that("methods work on empty nodesets", {
x <- read_xml("")
empty <- xml_find_all(x, "//c")
expect_error(empty[[1]], "subscript out of bounds")
expect_identical(empty[1], empty)
test <- empty
xml_attr(test, "test") <- 1
expect_identical(test, empty)
xml_attrs(test) <- c("test" = 1)
expect_identical(test, empty)
xml_name(test) <- "test"
expect_identical(test, empty)
xml_text(test) <- "test"
expect_identical(test, empty)
expect_identical(as.character(empty), character(0))
expect_identical(as_list(empty), list())
expect_identical(nodeset_apply(empty, identical), empty)
expect_output(print(empty), "\\{xml_nodeset \\(0\\)\\}")
expect_silent(tree_structure(empty))
xml_add_child(test, "test")
expect_identical(test, empty)
xml_add_sibling(test, "test")
expect_identical(test, empty)
expect_identical(xml_attr(empty, "test"), character())
expect_identical(xml_attrs(empty), list())
expect_identical(xml_double(empty), numeric())
expect_identical(xml_find_all(empty), empty)
expect_identical(xml_find_chr(empty), character())
expect_identical(xml_find_first(empty), empty)
expect_identical(xml_find_lgl(empty), logical())
expect_identical(xml_find_num(empty), numeric())
expect_identical(xml_integer(empty), integer())
expect_identical(xml_length(empty), 0L)
expect_identical(xml_name(empty), character())
expect_identical(xml_ns(empty), character())
expect_identical(xml_parent(empty), empty)
expect_identical(xml_path(empty), character())
xml_remove(test)
expect_identical(test, empty)
xml_replace(test)
expect_identical(test, empty)
xml_set_attr(test, "test", 1)
expect_identical(test, empty)
xml_set_attrs(test, c("test" = 1))
expect_identical(test, empty)
xml_set_name(test, "test")
expect_identical(test, empty)
xml_set_text(test, "test")
expect_identical(test, empty)
expect_identical(xml_siblings(empty), empty)
expect_silent(xml_structure(empty))
expect_identical(xml_text(empty), character())
expect_identical(xml_url(empty), character())
})
test_that("print method is correct", {
skip_if(getOption("width") < 20L, "Screen too narrow")
x <- read_html(test_path("lego.html.bz2"))
body <- xml_find_first(x, "//body")
divs <- xml_find_all(body, ".//div")[1:10]
expect_snapshot(print(divs))
# double-substring() logic
s <- c(
"123456789\\", # always too wide, '\' never encoded
"12345", # always fits
"12\\45" # doesn't fit when '\' is encoded
)
# embed as text on nodes ,,
s <- sprintf("<%1$s>%2$s%1$s>", letters[1:3], s)
x <- read_xml(sprintf("%s", paste(s, collapse="")))
expect_snapshot({
print(x, width = 13L)
print(x, width = 14L)
})
})
xml2/tests/testthat/test-xml_missing.R 0000644 0001762 0000144 00000002766 14727652467 017626 0 ustar ligges users x <- read_xml("
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
xml2/tests/testthat/_snaps/xml_attr.md 0000644 0001762 0000144 00000000267 14760705217 017613 0 ustar ligges users # xml_attrs<- modifies all attributes
Code
xml_attrs(docs) <- "test"
Condition
Error in `xml_attrs<-`:
! `test` must be a list of named character vectors.
xml2/tests/testthat/_snaps/xml_find.md 0000644 0001762 0000144 00000005043 14760705220 017550 0 ustar ligges users # xml_find_num errors with non numeric results
Code
xml_find_num(x, "//z")
Condition
Error in `xml_find_num()`:
! Element at path `//z` must be a number, not a object.
Code
xml_find_num(x, "//y")
Condition
Error in `xml_find_num()`:
! Element at path `//y` must be a number, not a list.
Code
xml_find_num(x, "1=1")
Condition
Error in `xml_find_num()`:
! Element at path `1=1` must be a number, not `TRUE`.
Code
xml_find_num(x, "string(5)")
Condition
Error in `xml_find_num()`:
! Element at path `string(5)` must be a number, not the string "5".
# xml_find_int errors with non integer results
Code
xml_find_int(x, "//z")
Condition
Error in `xml_find_int()`:
! Element at path `//z` must be a whole number, not a object.
Code
xml_find_int(x, "//y")
Condition
Error in `xml_find_int()`:
! Element at path `//y` must be a whole number, not a list.
Code
xml_find_int(x, "number(1.1)")
Condition
Error in `xml_find_int()`:
! Element at path `number(1.1)` must be a whole number, not the number 1.1.
# xml_find_chr errors with non character results
Code
xml_find_chr(x, "//z")
Condition
Error in `xml_find_chr()`:
! Element at path `//z` must be a single string, not a object.
Code
xml_find_chr(x, "//y")
Condition
Error in `xml_find_chr()`:
! Element at path `//y` must be a single string, not a list.
Code
xml_find_chr(x, "1=1")
Condition
Error in `xml_find_chr()`:
! Element at path `1=1` must be a single string, not `TRUE`.
Code
xml_find_chr(x, "1+1")
Condition
Error in `xml_find_chr()`:
! Element at path `1+1` must be a single string, not the number 2.
# xml_find_lgl errors with non logical results
Code
xml_find_lgl(x, "//z")
Condition
Error in `xml_find_lgl()`:
! Element at path `//z` must be `TRUE` or `FALSE`, not a object.
Code
xml_find_lgl(x, "//y")
Condition
Error in `xml_find_lgl()`:
! Element at path `//y` must be `TRUE` or `FALSE`, not a list.
Code
xml_find_lgl(x, "string(5)")
Condition
Error in `xml_find_lgl()`:
! Element at path `string(5)` must be `TRUE` or `FALSE`, not the string "5".
Code
xml_find_lgl(x, "1+1")
Condition
Error in `xml_find_lgl()`:
! Element at path `1+1` must be `TRUE` or `FALSE`, not the number 2.
xml2/tests/testthat/_snaps/xml_nodeset.md 0000644 0001762 0000144 00000002246 14760705220 020273 0 ustar ligges users # print method is correct
Code
print(divs)
Output
{xml_nodeset (10)}
[1]
\n
\n\n
Node Modification
Jim Hester
2025-03-14
Modifying Existing XML
Modifying existing XML can be done in xml2 by using the replacement
functions of the accessors. They all have methods for both individual
xml_node objects as well as xml_nodeset
objects. If a vector of values is provided it is applied piecewise over
the nodeset, otherwise the value is recycled.
Text Modification
Text modification only happens on text nodes. If a given node has
more than one text node only the first will be affected. If you want to
modify additional text nodes you need to select them explicitly with
/text().
x <-read_xml("<p>This is some <b>text</b>. This is more.</p>")xml_text(x)#> [1] "This is some text. This is more."xml_text(x) <-"This is some other text."xml_text(x)#> [1] "This is some other text.text. This is more."# You can avoid this by explicitly selecting the text node.x <-read_xml("<p>This is some text. This is <b>bold!</b></p>")text_only <-xml_find_all(x, "//text()")xml_text(text_only) <-c("This is some other text. ", "Still bold!")xml_text(x)#> [1] "This is some other text. Still bold!"xml_structure(x)#> <p>#> {text}#> <b>#> {text}
Attribute and Namespace Definition Modification
Attributes and namespace definitions are modified one at a time with
xml_attr() or all at once with xml_attrs(). In
both cases using NULL as the value will remove the
attribute completely.
x <-read_xml("<a href='invalid!'>xml2</a>")xml_attr(x, "href")#> [1] "invalid!"xml_attr(x, "href") <-"https://github.com/r-lib/xml2"xml_attr(x, "href")#> [1] "https://github.com/r-lib/xml2"xml_attrs(x) <-c(id ="xml2", href ="https://github.com/r-lib/xml2")xml_attrs(x)#> href id #> "https://github.com/r-lib/xml2" "xml2"x#> {xml_document}#> <a href="https://github.com/r-lib/xml2" id="xml2">xml_attrs(x) <-NULLx#> {xml_document}#> <a># Namespaces are added with as a xmlns or xmlns:prefix attributexml_attr(x, "xmlns") <-"http://foo"x#> {xml_document}#> <a xmlns="http://foo">xml_attr(x, "xmlns:bar") <-"http://bar"x#> {xml_document}#> <a xmlns="http://foo" xmlns:bar="http://bar">
All of these functions have a .copy argument. If this is
set to FALSE they will remove the new node from its
location before inserting it into the new location. Otherwise they make
a copy of the node before insertion.
The xml_remove() can be used to remove a node (and its
children) from a tree. The default behavior is to unlink the node from
the tree, but does not free the memory for the node, so R
objects pointing to the node are still valid.
This allows code like the following to work without crashing R
x <-read_xml("<foo><bar><baz/></bar></foo>")x1 <- x %>%xml_children() %>% .[[1]]x2 <- x1 %>%xml_children() %>% .[[1]]xml_remove(x1)rm(x1)gc()#> used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)#> Ncells 631023 33.8 1423872 76.1 NA 1267757 67.8#> Vcells 1187491 9.1 8388608 64.0 16384 2646979 20.2x2#> {xml_node}#> <baz>
If you are not planning on referencing these nodes again this memory
is wasted. Calling xml_remove(free = TRUE) will remove the
nodes and free the memory used to store them.
Note In this case any node which previously
pointed to the node or its children will instead be pointing to free
memory and may cause R to crash. xml2 can’t figure this out for you, so
it’s your responsibility to remove any objects which are no longer
valid.
In particular xml_find_*() results are easy to overlook,
for example
x <-read_xml("<a><b /><b><b /></b></a>")bees <-xml_find_all(x, "//b")xml_remove(xml_child(x), free =TRUE)# bees[[1]] is no longer valid!!!rm(bees)gc()#> used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)#> Ncells 631067 33.8 1423872 76.1 NA 1267757 67.8#> Vcells 1187692 9.1 8388608 64.0 16384 2646979 20.2