Search code examples
xmlxsltxml-parsingstx

Matching two different blocks in XML using STX parser


I have a large XML file that I am trying to match two blocks in by id using the java joost implementation for STX. This is my XML file:

<?xml version='1.0' encoding='UTF-8'?>
<main>
    <table>
        <description id="1" size="big" />
        <description id="2" size="small" />
        <description id="3" size="medium" />
    </table>
    <products>
        <item id="1" color="red" />
        <item id="2" color="green" />
        <item id="3" color="blue" />
    </products>
</main>

And this is the output I'm trying to create:

<?xml version='1.0' encoding='UTF-8'?>
<total>
    <item id="1" color="red" size="big" />
    <item id="2" color="green" size="small" />
    <item id="3" color="blue" size="medium" />
</total>

The stx:buffer did not give any results, and there was no solution for creating key-value pairs similar to a java HashMap. Is there any solution?


Solution

  • I am not familiar with STX, with standard XSLT 3.0 streaming you can use an accumulator holding a map:

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:map="http://www.w3.org/2005/xpath-functions/map"
        exclude-result-prefixes="#all"
        version="3.0">
      
      <xsl:accumulator name="description" as="map(xs:untypedAtomic, xs:string)" initial-value="map{}" streamable="yes">
        <xsl:accumulator-rule match="table/description" select="map:put($value, @id, string(@size))"/>
      </xsl:accumulator>
    
      <xsl:mode on-no-match="shallow-copy" use-accumulators="description" streamable="yes"/>
    
      <xsl:output method="xml" indent="yes"/>
      
      <xsl:template match="main">
        <xsl:apply-templates/>
      </xsl:template>
    
      <xsl:template match="table"/>
      <xsl:template match="products">
        <total>
          <xsl:apply-templates/>
        </total>
      </xsl:template>
      
      <xsl:template match="item">
        <xsl:copy>
          <xsl:apply-templates select="@*"/>
          <xsl:attribute name="size" select="accumulator-before('description')(@id)"/>
        </xsl:copy>
      </xsl:template>
    
    </xsl:stylesheet>
    

    XSLT 3.0 with streaming seems currently to be only supported by Saxon EE (9.8 and later (i.e. 9.9, 10, 11, 12)).

    I have now tried to construct an STX example, the following seems to work:

    <stx:transform xmlns:stx="http://stx.sourceforge.net/2002/ns" version="1.0"
                   pass-through="all" strip-space="yes">
    
      <stx:variable name="id"/>
      <stx:variable name="size"/>
    
      <stx:template match="table/description">
        <stx:assign name="id" select="($id, string(@id))"/>
        <stx:assign name="size" select="($size, string(@size))"/>
      </stx:template>
    
      <stx:template match="/* | table | table/description">
        <stx:process-children/>
      </stx:template>
    
      <stx:template match="products">
        <total>
          <stx:process-children/>
        </total>
      </stx:template>
    
      <stx:template match="item">
        <stx:copy attributes="@*">
          <stx:attribute name="size" select="item-at($size, index-of($id, @id))"/>
        </stx:copy>
      </stx:template>
    
    </stx:transform>
    

    Or using a Java HashMap:

    <stx:transform xmlns:stx="http://stx.sourceforge.net/2002/ns" version="1.0"
                   xmlns:hm="java:java.util.HashMap"
                   exclude-result-prefixes="hm"
                   pass-through="all" strip-space="no">
    
      <stx:variable name="description" select="hm:new()"/>
    
      <stx:template match="table/description">
        <stx:variable name="oldValue" select="hm:put($description, string(@id), string(@size))"/>
        <!-- <stx:assign name="description" select="$description"/> -->
      </stx:template>
    
      <stx:template match="/* | table | table/description">
        <stx:process-children/>
      </stx:template>
    
      <stx:template match="products">
        <total>
          <stx:process-children/>
        </total>
      </stx:template>
    
      <stx:template match="item">
        <stx:copy attributes="@*">
          <stx:attribute name="size" select="hm:get($description, string(@id))"/>
        </stx:copy>
      </stx:template>
    
    </stx:transform>