Consider the XML file, Report.xml :
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="abc">
<subchild>...</subchild>
</child>
<child value="xyz">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
I just want to split the above XML file as:
ReportSplit1.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="abc">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
ReportSplit2.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="xyz">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
i.e. retaining the parent node's attributes and retaining the sibling nodes. The split should be made only on the children in Statistics node.
Followed the workaround given in the link by changing the snippet as
package xmlsplitting;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.*;
public class XmlSplit
{
static public void main(String[] arg) throws Exception
{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse("D:\\Analyzer\\FileSplit\\Report.xml");
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList list = (NodeList)xpath.evaluate("//Report/Statistics/child", doc, XPathConstants.NODESET);
for (int i=1; i<list.getLength(); i++)
{
Node element = list.item(i).cloneNode(true);
if(element.hasChildNodes())
{
Source src = new DOMSource(element);
FileOutputStream fs = new FileOutputStream( "D:\\Analyzer\\FileSplit\\ReportSplit"+ i + ".xml");
Result dest = new StreamResult(fs);
aTransformer.transform(src, dest);
fs.close();
}
}
}
}
The achieved XML files splits are:
ReportSplit1.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<child value="abc">
<subchild>...</subchild>
</child>
ReportSplit2.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<child value="xyz">
<subchild>...</subchild>
</child>
Could anyone provide a workaround to achieve the desired XML files splits?
Consider using XSLT, the declarative, special-purpose programming language to transform XML documents instead of XPath as you require whole document transformation. For your purposes, an embedded, dynamic XSLT run on a loop of values can output multiple XML files:
XSLT Script (embedded below, example here uses 'abc' which is iteratively used and replaced)
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>
<!-- Identity Transform -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="child[not(@value='abc')]"/>
</xsl:transform>
Java Script
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.OutputKeys;
import java.io.*;
import java.net.URISyntaxException;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class XmlSplit {
public static void main(String[] args) throws IOException, URISyntaxException,
SAXException, ParserConfigurationException,
TransformerException {
// Load XML Source
String inputXML = "/path/to/XMLSource.xml";
// Declare XML Values Array
String[] xmlVals = {"abc", "xyz"};
// Iterate through Values running dynamic, embedded XSLT
for (String s: xmlVals) {
String outputXML = "/path/to/output_" + s + ".xml";
String xslStr = String.join("\n",
"<xsl:transform xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" version=\"1.0\">",
"<xsl:output version=\"1.0\" encoding=\"UTF-8\" indent=\"yes\" />",
"<xsl:strip-space elements=\"*\"/>",
"<xsl:template match=\"@*|node()\">",
"<xsl:copy>",
"<xsl:apply-templates select=\"@*|node()\"/>",
"</xsl:copy>",
"</xsl:template>",
"<xsl:template match=\"child[not(@value='"+ s +"')]\"/>",
"</xsl:transform>");
Source xslt = new StreamSource(new StringReader(xslStr));
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File(inputXML));
// XSLT Transformation with pretty print
TransformerFactory prettyPrint = TransformerFactory.newInstance();
Transformer transformer = prettyPrint.newTransformer(xslt);
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
// Output Result to File
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(outputXML));
transformer.transform(source, result);
}
}
}