I try to split a large XML (like 10 GB) file into smaller XML files with XSL streaming.
The XML looks like:
<?xml version="1.0" encoding="UTF-8"?>
<Book>
<Header>...</Header>
<Entry>...</Entry>
<Entry>...</Entry>
<Entry>...</Entry>
<Entry>...</Entry>
</Book>
The XSL looks like:
<xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:mode streamable="yes" on-no-match="shallow-copy"/>
<xsl:template match="/">
<xsl:apply-templates select="Book">
<xsl:with-param name="header" select="/Book/Header"/>
<xsl:with-param name="top-level-element" select="name(/*[1])"/>
</xsl:apply-templates>
</xsl:template>
<xsl:template match="Book">
<xsl:param name="top-level-element"/>
<xsl:param name="header"/>
<xsl:result-document href="{concat(position(),'.xml')}" method="xml">
<xsl:element name="{$top-level-element}">
<xsl:value-of select="$header"/>
<xsl:iterate
select="Entry">
<xsl:apply-templates select="."/>
</xsl:iterate>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="Entry">
<xsl:copy-of select="."/>
</xsl:template>
</xsl:stylesheet>
When I call the transformation with the XML I get following error:
Error on line 6 column 29
XTSE3430 Template rule is not streamable
* Operand {Book/Header} of {xsl:apply-templates} selects streamed nodes in a
context that allows arbitrary navigation (line 8)
* The result of the template rule can contain streamed nodes
Can someone help me what I'm doing wrong?
Here is an example that outputs a new file for each Entry
in the input document, copying the Header
that the accumulator has captured:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="#all"
xmlns:saxon="http://saxon.sf.net/">
<xsl:mode on-no-match="shallow-skip" streamable="yes" use-accumulators="#all"/>
<xsl:output indent="yes"/>
<xsl:accumulator name="header" as="element(Header)?" streamable="yes" initial-value="()">
<xsl:accumulator-rule match="Header" phase="end" saxon:capture="yes" select="." />
</xsl:accumulator>
<xsl:accumulator name="Entry-count" as="xs:integer" streamable="yes" initial-value="0">
<xsl:accumulator-rule match="Entry" select="$value + 1"/>
</xsl:accumulator>
<xsl:template match="Entry">
<xsl:result-document href="Entry-{accumulator-before('Entry-count')}.xml">
<xsl:element name="{name(ancestor::*[last()])}" namespace="{namespace-uri(ancestor::*[last()])}">
<xsl:copy-of select="accumulator-before('header')"/>
<xsl:copy-of select="."/>
</xsl:element>
</xsl:result-document>
</xsl:template>
</xsl:stylesheet>
If you don't want to split on each Entry
, then, assuming you want to store a certain number of adjacent Entry
elements in a result document, you can use positional grouping rather easily with streaming:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="#all"
xmlns:saxon="http://saxon.sf.net/">
<xsl:param name="chunk-size" as="xs:integer" select="5"/>
<xsl:mode on-no-match="shallow-skip" streamable="yes" use-accumulators="#all"/>
<xsl:output indent="yes"/>
<xsl:accumulator name="header" as="element(Header)?" streamable="yes" initial-value="()">
<xsl:accumulator-rule match="Header" phase="end" saxon:capture="yes" select="." />
</xsl:accumulator>
<xsl:template match="/*">
<xsl:for-each-group select="Entry" group-adjacent="(position() - 1) idiv $chunk-size">
<xsl:result-document href="chunk-{position()}.xml">
<xsl:element name="{name(ancestor::*[last()])}" namespace="{namespace-uri(ancestor::*[last()])}">
<xsl:copy-of select="accumulator-before('header')"/>
<xsl:copy-of select="current-group()"/>
</xsl:element>
</xsl:result-document>
</xsl:for-each-group>
</xsl:template>
</xsl:stylesheet>