Search code examples
xsltxslt-2.0saxonxslt-grouping

XSLT grouping and leaving out duplicates


I have the following XML input file:

<mappings>
    <mapping>
        <key>6718</key>
        <value attribute="content_type">Info Page</value>
    </mapping>
    <mapping>
        <key>35905</key>
        <value attribute="content_type">Press releases</value>
    </mapping>
    <mapping>
        <key>6718</key>
        <value attribute="content_type">Info Page</value>
    </mapping>
    <mapping>
        <key>36941</key>
        <value attribute="content_type">Press releases</value>
    </mapping>
    <mapping>
        <key>24920</key>
        <value attribute="content_type">Press releases</value>
    </mapping>
    <mapping>
        <key>40244</key>
        <value attribute="content_type">Press releases</value>
    </mapping>
    <mapping>
        <key>36639</key>
        <value attribute="content_type">Press releases</value>
    </mapping>
    <mapping>
        <key>1861</key>
        <value attribute="content_type">Press releases</value>
    </mapping>
    <mapping>
        <key>2280</key>
        <value attribute="content_type">Press releases</value>
    </mapping>
    <mapping>
        <key>42062</key>
        <value attribute="content_type">Press releases</value>
    </mapping>
</mappings>

I would like to produce the following XML:

<reductions>
    <reduction>
        <group>Info Page</group>
        <key>6718</key>
    </reduction>
    <reduction>
        <group>Press releases</group>
        <key>35905</key>
        <key>36941</key>
        <key>24920</key>
        <key>40244</key>
        <key>36639</key>
        <key>1861</key>
        <key>2280</key>
        <key>42062</key>
    </reduction>
</reductions>

So the keys from the input XML need to be grouped in the output XML based on the value of the value node from the input XML. Also the duplicates need to be removed, the input XML contans the key with value 6718 twice, the output XML only once.

I'm using XSLT 2.0 with Saxon 9 HE.

Can someone please help me?

[EDIT]

<?xml version="1.0" encoding="UTF-8"?>
<xsl:transform 
    version="2.0" 
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    exclude-result-prefixes="xs">

    <xsl:template match="/">
        <xsl:variable name="mappings">
            <mappings>
                <xsl:apply-templates select="resource/R"/>
            </mappings>
        </xsl:variable>
        <reductions>
            <xsl:apply-templates select="$mappings/*"/>
        </reductions>
    </xsl:template>

    <xsl:template match="R">
        <mapping>
            <key><xsl:value-of select="MT[@N='content_id']/@V"/></key>
            <value>
                <xsl:attribute name="attribute">content_type</xsl:attribute>
                <xsl:value-of select="MT[@N='content_type']/@V"/>
            </value>
        </mapping>
        <!--mapping>
            <key><xsl:value-of select="MT[@N='content_id']/@V"/></key>
            <value>
                <xsl:attribute name="attribute">modified_timestamp</xsl:attribute>
                <xsl:value-of select="format-dateTime(MT[@N='modified_timestamp']/@V, '[Y0001]-[M01]-[D01]')"/>
            </value>
        </mapping-->
    </xsl:template>

    <xsl:template match="mapping">
        <xsl:for-each-group select="." group-by="value">
            <reduction>
                <group><xsl:value-of select="current-grouping-key()"/></group>
                <xsl:for-each-group select="current-group()/key" group-by=".">
                    <xsl:copy-of select="."/>
                </xsl:for-each-group>
            </reduction>
        </xsl:for-each-group>
    </xsl:template>

</xsl:transform>

Solution

  • Here is a stylesheet using for-each-group twice:

    <xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    
    <xsl:output indent="yes"/>
    
    <xsl:template match="mappings">
      <reductions>
        <xsl:for-each-group select="mapping" group-by="value">
          <reduction>
            <group><xsl:value-of select="current-grouping-key()"/></group>
            <xsl:for-each-group select="current-group()/key" group-by=".">
              <xsl:copy-of select="."/>
            </xsl:for-each-group>
          </reduction>
        </xsl:for-each-group>
      </reductions>
    </xsl:template>
    
    </xsl:stylesheet>
    

    [edit] When I run the posted stylesheet with Saxon 9.4 HE against the input

    <mappings>
        <mapping>
            <key>6718</key>
            <value attribute="content_type">Info Page</value>
        </mapping>
        <mapping>
            <key>35905</key>
            <value attribute="content_type">Press releases</value>
        </mapping>
        <mapping>
            <key>6718</key>
            <value attribute="content_type">Info Page</value>
        </mapping>
        <mapping>
            <key>36941</key>
            <value attribute="content_type">Press releases</value>
        </mapping>
        <mapping>
            <key>24920</key>
            <value attribute="content_type">Press releases</value>
        </mapping>
        <mapping>
            <key>40244</key>
            <value attribute="content_type">Press releases</value>
        </mapping>
        <mapping>
            <key>36639</key>
            <value attribute="content_type">Press releases</value>
        </mapping>
        <mapping>
            <key>1861</key>
            <value attribute="content_type">Press releases</value>
        </mapping>
        <mapping>
            <key>2280</key>
            <value attribute="content_type">Press releases</value>
        </mapping>
        <mapping>
            <key>42062</key>
            <value attribute="content_type">Press releases</value>
        </mapping>
    </mappings>
    

    I get the result

    <reductions>
       <reduction>
          <group>Info Page</group>
          <key>6718</key>
       </reduction>
       <reduction>
          <group>Press releases</group>
          <key>35905</key>
          <key>36941</key>
          <key>24920</key>
          <key>40244</key>
          <key>36639</key>
          <key>1861</key>
          <key>2280</key>
          <key>42062</key>
       </reduction>
    </reductions>
    

    which is correct as far as I understand your requirements.