Node Modification

Jim Hester

2016-05-20

Modifying Existing XML

Modifying existing XML can be done in xml2 by using the replacement functions of the accessors. They all have methods for both individual xml_node objects as well as xml_nodeset objects. If a vector of values is provided it is applied piecewise over the nodeset, otherwise the value is recycled.

Text Modification

Text modification only happens on text nodes. If a given node has more than one text node only the first will be affected. If you want to modify additional text nodes you need to select them explicitly with /text().

library(xml2)
x <- read_xml("<p>This is some <b>text</b>. This is more.</p>")
xml_text(x)
## [1] "This is some text. This is more."
xml_text(x) <- "This is some other text."
## Warning in xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns,
## num_results = 1): 2 results found, but only returning first 1
xml_text(x)
## [1] "This is some other text.text. This is more."
# You can avoid this by explicitly selecting the text node.
x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
text_only <- xml_find_all(x, "//text()")

xml_text(text_only) <- c("This is some other text. ", "Still bold!")
xml_text(x)
## [1] "This is some other text. Still bold!"
xml_structure(x)
## <p>
##   {text}
##   <b>
##     {text}

Attribute Modification

Attributes are modified one at a time with xml_attr() or all at once with xml_attrs(). In both cases using NULL as the value will remove the attribute completely.

x <- read_xml("<a href='invalid!'>xml2</a>")
xml_attr(x, "href")
## [1] "invalid!"
xml_attr(x, "href") <- "https://github.com/hadley/xml2"
xml_attr(x, "href")
## [1] "https://github.com/hadley/xml2"
xml_attrs(x) <- c(id = "xml2", href = "https://github.com/hadley/xml2")
xml_attrs(x)
##                             href                               id 
## "https://github.com/hadley/xml2"                           "xml2"
cat(as.character(x))
## <?xml version="1.0" encoding="UTF-8"?>
## <a href="https://github.com/hadley/xml2" id="xml2">xml2</a>
xml_attrs(x) <- NULL
cat(as.character(x))
## <?xml version="1.0" encoding="UTF-8"?>
## <a>xml2</a>

Name Modification

Node names are modified with xml_name().

x <- read_xml("<a><b/></a>")
x
## {xml_document}
## <a>
## [1] <b/>
xml_name(x)
## [1] "a"
xml_name(x) <- "c"
x
## {xml_document}
## <c>
## [1] <b/>

Node modification

All of these functions have a copy argument. If this is set to FALSE they will remove the new node from its location before inserting it into the new location. Otherwise they make a copy of the node before insertion.

Replacing existing nodes

x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
children <- xml_children(x)
t1 <- children[[1]]
t2 <- children[[2]]
t3 <- xml_children(children[[2]])[[1]]

xml_replace(t1, t3)
## {xml_node}
## <child>
x
## {xml_document}
## <parent>
## [1] <child>3</child>
## [2] <child>2<child>3</child></child>

Append sibling

x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
children <- xml_children(x)
t1 <- children[[1]]
t2 <- children[[2]]
t3 <- xml_children(children[[2]])[[1]]

xml_add_sibling(t1, t3)
## {xml_node}
## <child>
x
## {xml_document}
## <parent>
## [1] <child>1</child>
## [2] <child>3</child>
## [3] <child>2<child>3</child></child>
xml_add_sibling(t3, t1, where = "before")
## {xml_node}
## <child>
x
## {xml_document}
## <parent>
## [1] <child>1</child>
## [2] <child>3</child>
## [3] <child>2<child>3</child><child>1</child></child>

Add a child

x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
children <- xml_children(x)
t1 <- children[[1]]
t2 <- children[[2]]
t3 <- xml_children(children[[2]])[[1]]

xml_add_child(t1, t3)
## {xml_node}
## <child>
x
## {xml_document}
## <parent>
## [1] <child>1<child>3</child></child>
## [2] <child>2<child>3</child></child>
xml_add_child(t1, read_xml("<test/>"))
## {xml_document}
## <test>
x
## {xml_document}
## <parent>
## [1] <child>1<child>3</child><test/></child>
## [2] <child>2<child>3</child></child>

Namespaces

We want to construct a document with the following namespace layout. (From http://stackoverflow.com/questions/32939229/creating-xml-in-r-with-namespaces/32941524#32941524).

<?xml version = "1.0" encoding="UTF-8"?>
<sld xmlns="http://www.o.net/sld"
     xmlns:ogc="http://www.o.net/ogc"
     xmlns:se="http://www.o.net/se"
     version="1.1.0" >
<layer>
<se:Name>My Layer</se:Name>
</layer>
</sld>
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following objects are masked from 'package:testthat':
## 
##     equals, is_less_than, not
d <- xml_new_document() %>%
  xml_add_child("sld",
    xmlns = "http://www.o.net/sld",
    "xmlns:ogc" = "http://www.o.net/ogc",
    "xmlns:se" = "http://www.o.net/se",
    version = "1.1.0") %>%
  xml_add_child("layer") %>%
  xml_add_child("se:Name") %>%
  xml_root()

cat(as.character(d))
## <?xml version="1.0"?>
## <sld xmlns="http://www.o.net/sld" xmlns:ogc="http://www.o.net/ogc" xmlns:se="http://www.o.net/se" version="1.1.0"><layer><se:Name/></layer></sld>