xml2
xml2 copied to clipboard
.copy = FALSE argument in xml_add_child does not move the element
As the documentation of xml_add_child
suggests, setting .copy = FALSE
moves the element to its new location. However, it does not remove the node from its original location, leaving a zombied node that cannot be removed.
R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18362)
xml2_1.3.1
Case 1
# Create a simple document
doc <- xml2::as_xml_document(
"<document>
<body>
</body>
</document>"
)
body <- xml2::xml_find_all(doc, "//body")[[1]]
# Create an element named "a" in the document
a <- xml2::xml_add_child(doc, "a")
# Move it to body
a_moved <- xml2::xml_add_child(body, a, .copy = FALSE)
# We see it is copied by reference
a$node
#> <pointer: 0x0000000008a32e40>
a_moved$node
#> <pointer: 0x0000000008a32e40>
# We see it now lives in two places, but we find only one as there is only one
cat(as.character(doc))
#> <?xml version="1.0" encoding="UTF-8"?>
#> <document>
#> <body>
#> <a/></body>
#> <a/>
#> </document>
xml2::xml_find_all(doc, "//a")
#> {xml_nodeset (1)}
#> [1] <a/>
# This should remove 'both' of them
xml2::xml_remove(a)
# But only removes the "a" at the location we wanted to move it to
cat(as.character(doc))
#> <?xml version="1.0" encoding="UTF-8"?>
#> <document>
#> <body>
#> </body>
#> <a/>
#> </document>
xml2::xml_find_all(doc, "//a")
#> {xml_nodeset (1)}
#> [1] <a/>
# Removing it again
xml2::xml_remove(a)
# It is now zombied, because we cannot remove it.
cat(as.character(doc))
#> <?xml version="1.0" encoding="UTF-8"?>
#> <document>
#> <body>
#> </body>
#> <a/>
#> </document>
xml2::xml_find_all(doc, "//a")
#> {xml_nodeset (1)}
#> [1] <a/>
Case 2
Whenever namespaces are involved, xml_find_all
creates an error, I guess it produces an infinite loop somewhere.
# Create a simple document
doc <- xml2::as_xml_document(
"<document xmlns:z=\"https://example.com/\">
<z:body>
</z:body>
</document>"
)
body <- xml2::xml_find_all(doc, "//z:body")[[1]]
# Create an element named "a" in the document
a <- xml2::xml_add_child(doc, "z:a")
# Move it to body
a_moved <- xml2::xml_add_child(body, a, .copy = FALSE)
# We see it is copied by reference
a$node
#> <pointer: 0x00000000047eaea0>
a_moved$node
#> <pointer: 0x00000000047eaea0>
# We see it now lives in two places, but trying to find the first, or all, crashes
cat(as.character(doc))
#> <?xml version="1.0" encoding="UTF-8"?>
#> <document xmlns:z="https://example.com/">
#> <z:body>
#> <z:a/></z:body>
#> <z:a/>
#> </document>
xml2::xml_find_first(doc, "//z:a")
#> Error in xml_find_first.xml_node(doc, "//z:a"): Memory allocation failed : growing nodeset hit limit
#> [2]
xml2::xml_find_all(doc, "//z:a")
#> Error in xml_find_all.xml_node(doc, "//z:a"): Memory allocation failed : growing nodeset hit limit
#> [2]
I can reproduce the behavior you describe, but I don't really have time to look into if there is a obvious way to fix it. I would suggest not using .copy = FALSE
I came to this repo to report the same problem :sweat_smile:
xml <- xml2::read_xml(
"<document>
<a>blop</a><b>blip</b><b>paf</b>
</document>
"
)
xml
#> {xml_document}
#> <document>
#> [1] <a>blop</a>
#> [2] <b>blip</b>
#> [3] <b>paf</b>
a <- xml2::xml_find_first(xml, ".//a")
xml2::xml_add_parent(a, "parent")
parent <- xml2::xml_find_first(xml, ".//parent")
b <- xml2::xml_find_all(xml, ".//b")
xml2::xml_add_child(parent, b[[1]], .copy = FALSE)
# The node with 'blip' is in two places.
xml
#> {xml_document}
#> <document>
#> [1] <parent>\n <a>blop</a>\n <b>blip</b>\n <b>paf</b>\n</parent>
#> [2] <b>blip</b>
#> [3] <b>paf</b>
Created on 2022-05-19 by the reprex package (v2.0.1)