Search code examples
xmlxsltxslt-2.0xslt-grouping

Duplicate elements occuring while transforming XML in grouping elements in XSLT 2.0


While transforming this element occure multi place <p content-type="paratext">paratext 2_1</p>

XML INPUT:

<?xml version="1.0" encoding="UTF-8"?>
<body>
    <p content-type="heading-01">heading-01</p>
    <p content-type="paratext">paratext 1</p>
    <p content-type="paratext">paratext 1</p>
    <p content-type="heading-01">heading-01</p>
    <p content-type="heading-02">heading-02</p>
    <p content-type="paratext">paratext 2_1</p>
    <p content-type="heading-02">heading-02</p>
    <p content-type="paratext">paratext 2_2</p>
    <p content-type="heading-01">heading-01</p>
    <p content-type="paratext">paratext 3</p>
    <p content-type="paratext">paratext 3</p>
    <p content-type="heading-01">heading-01</p>
    <p content-type="paratext">paratext 4</p>
    <p content-type="paratext">paratext 4</p>
</body>

REQUIRED OUTPUT:

<?xml version="1.0" encoding="UTF-8"?>
<body>
    <sec sec-type="heading-01">
        <title>heading-01</title>
        <p content-type="paratext">paratext 1</p>
        <p content-type="paratext">paratext 1</p>
    </sec>
    <sec sec-type="heading-01">
        <title>heading-01</title>
        <sec sec-type="heading-02">
            <title>heading-02</title>
            <p content-type="paratext">paratext 2_1</p>
        </sec>
        <sec sec-type="heading-02">
            <title>heading-02</title>
            <p content-type="paratext">paratext 2_2</p>
        </sec>
    </sec>
    <sec sec-type="heading-01">
        <title>heading-01</title>
        <p content-type="paratext">paratext 3</p>
        <p content-type="paratext">paratext 3</p>
    </sec>
    <sec sec-type="heading-01">
        <title>heading-01</title>
        <p content-type="paratext">paratext 4</p>
        <p content-type="paratext">paratext 4</p>
    </sec>
</body>

CURRENT OUTPUT:

<?xml version="1.0" encoding="UTF-8"?>
<body>
    <sec sec-type="heading-01">
        <title>heading-01</title>
        <p content-type="paratext">paratext 1</p>
        <p content-type="paratext">paratext 1</p>
    </sec>
    <sec sec-type="heading-01">
        <title>heading-01</title>
        <sec sec-type="heading-02">
            <title>heading-02</title>
            <p content-type="paratext">paratext 2_1</p>
        </sec>
        <p content-type="paratext">paratext 2_1</p>
        <sec sec-type="heading-02">
            <title>heading-02</title>
            <p content-type="paratext">paratext 2_2</p>
        </sec>
        <p content-type="paratext">paratext 2_2</p>
    </sec>
    <sec sec-type="heading-01">
        <title>heading-01</title>
        <p content-type="paratext">paratext 3</p>
        <p content-type="paratext">paratext 3</p>
    </sec>
    <sec sec-type="heading-01">
        <title>heading-01</title>
        <p content-type="paratext">paratext 4</p>
        <p content-type="paratext">paratext 4</p>
    </sec>
</body>

XSLT:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">

    <xsl:template match="@* | node()">
        <xsl:copy>
            <xsl:apply-templates select="@* | node()"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="body">
        <body>
            <xsl:for-each select="p[@content-type = 'heading-01']">
                <sec sec-type="{@content-type}">
                    <title>
                        <xsl:value-of select="."/>
                    </title>
                    <xsl:apply-templates select="following-sibling::node() except (following-sibling::p[@content-type = ('heading-01', 'referencetitle')], following-sibling::p[@content-type = ('heading-01', 'referencetitle')]/following-sibling::node())"/>
                </sec>
            </xsl:for-each>
        </body>
    </xsl:template>

    <xsl:template match="p[@content-type = 'heading-02']">
        <sec sec-type="{@content-type}">
            <title>
                <xsl:value-of select="."/>
            </title>
            <xsl:apply-templates select="following-sibling::node() except (following-sibling::p[@content-type = ('heading-01', 'heading-02', 'referencetitle')], following-sibling::p[@content-type = ('heading-01', 'heading-02', 'referencetitle')]/following-sibling::node())"/>
        </sec>
    </xsl:template>

</xsl:stylesheet>

Solution

  • The following is XSLT 3 but the used recursive function using for-each-group would work in XSLT 2 as well if you use concat instead of || and xsl:value-of instead of the text value template {.}:

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:mf="http://example.com/mf"
        exclude-result-prefixes="#all"
        expand-text="yes"
        version="3.0">
    
      <xsl:function name="mf:nest" as="element()*">
          <xsl:param name="input" as="element(p)*"/>
          <xsl:param name="level" as="xs:integer"/>
          <xsl:for-each-group select="$input" group-starting-with="p[@content-type = 'heading-' || format-number($level, '00')]">
              <xsl:choose>
                  <xsl:when test="self::p[@content-type = 'heading-' || format-number($level, '00')]">
                      <sec sec-type="{@content-type}">
                          <title>{.}</title>
                          <xsl:apply-templates select="mf:nest(tail(current-group()), $level + 1)"/>
                      </sec>
                  </xsl:when>
                  <xsl:otherwise>
                      <xsl:apply-templates select="current-group()"/>
                  </xsl:otherwise>
              </xsl:choose>
          </xsl:for-each-group>
      </xsl:function>
    
      <xsl:mode on-no-match="shallow-copy"/>
    
      <xsl:output method="xml" indent="yes"/>
    
      <xsl:template match="body">
          <xsl:copy>
              <xsl:apply-templates select="mf:nest(*, 1)"/>
          </xsl:copy>
      </xsl:template>
    </xsl:stylesheet>
    

    https://xsltfiddle.liberty-development.net/6rewNxB/1

    XSLT 2:

    <?xml version="1.0" encoding="UTF-8" ?>
    <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:mf="http://example.com/mf"
        exclude-result-prefixes="#all">
    
      <xsl:function name="mf:nest" as="element()*">
          <xsl:param name="input" as="element(p)*"/>
          <xsl:param name="level" as="xs:integer"/>
          <xsl:for-each-group select="$input" group-starting-with="p[@content-type = concat('heading-', format-number($level, '00'))]">
              <xsl:choose>
                  <xsl:when test="self::p[@content-type = concat('heading-', format-number($level, '00'))]">
                      <sec sec-type="{@content-type}">
                          <title>
                              <xsl:value-of select="."/>
                          </title>
                          <xsl:apply-templates select="mf:nest(subsequence(current-group(), 2), $level + 1)"/>
                      </sec>
                  </xsl:when>
                  <xsl:otherwise>
                      <xsl:apply-templates select="current-group()"/>
                  </xsl:otherwise>
              </xsl:choose>
          </xsl:for-each-group>
      </xsl:function>
    
        <xsl:template match="@*|node()">
            <xsl:copy>
                <xsl:apply-templates select="@*|node()"/>
            </xsl:copy>
        </xsl:template>
    
      <xsl:output method="xml" indent="yes"/>
    
      <xsl:template match="body">
          <xsl:copy>
              <xsl:apply-templates select="mf:nest(*, 1)"/>
          </xsl:copy>
      </xsl:template>
    
    </xsl:transform>
    

    http://xsltransform.net/pNEhB3u