Search code examples
xslt-2.0

making a non logic flat structure hierarchical


I want to group by headers, but when the structure isn't logical the sectioning stops. For example <h2> followed by <h4>. See example. I would like to see the <h4> sectioned when there is no <h3> but I can't seem to get it work. Could someone help me with that? I work with xslt 2.0.

Input:


    <?xml version="1.0" encoding="UTF-8"?>
    <document id="21" state="Schrijven" documentTypeName="News">
            <document track-changes="false" version="1">
            <section>
                    <h1>Title h1</h1>
                        <p>Some text</p>
                        <p/>
                    <h2>Title h2</h2>
                       <p>Some text</p>
                       <p/>
                    <h3>Title h3</h3>
                       <p>Some text</p>
                       <p/>
                    <h2>Title h2</h2>
                       <p>Some text</p>
                       <p/>
                    <h4>Title h4</h4>
                       <p>Some text</p>
                       <p/>   
            </section>        
            </document>
        </document>

Output


    <html>
       <head>
          <meta charset="UTF-8"/>
        </head>
       <body data-sws-documentkey="id-00021">
          <section class="level-newsitem">
             <h1 class="nieuws">Title h1</h1>
             <p>Some text</p>
             <p/>
             <section class="level">
                <h2>Title h2</h2>
                <p>Some text</p>
                <p/>
                <section class="level">
                   <h3>Title h3</h3>
                   <p>Some text</p>
                   <p/>
                </section>
             </section>
             <section class="level">
                <h2>Title h2</h2>
                <p>Some text</p>
                <p/>
                <h4>Title h4</h4>
                <p>Some text</p>
                <p/>
             </section>
          </section>
       </body>
    </html>

Desired Output


    <html>
       <head>
          <meta charset="UTF-8"/>
        </head>
       <body data-sws-documentkey="id-00021">
          <section class="level-newsitem">
             <h1 class="nieuws">Title h1</h1>
             <p>Some text</p>
             <p/>
             <section class="level">
                <h2>Title h2</h2>
                <p>Some text</p>
                <p/>
                <section class="level">
                   <h3>Title h3</h3>
                   <p>Some text</p>
                   <p/>
                </section>
             </section>
             <section class="level">
                <h2>Title h2</h2>
                <p>Some text</p>
                <p/>
                <section class="level">
                   <h4>Title h4</h4>
                   <p>Some text</p>
                   <p/>
                </section>
             </section>
          </section>
       </body>
    </html>

Stylesheet


    <?xml version="1.0" encoding="UTF-8"?>

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:cwc="urn:cwc"
        xmlns:p1="urn:p1"
        exclude-result-prefixes="xs cwc p1"
        version="2.0">

    <xsl:output method="xml" indent="yes" encoding="UTF-8"/>
    <xsl:strip-space elements="document meta section div row"/>

    <xsl:variable name="DocumentKey">
        <xsl:value-of select="concat('id-000', /document[1]/@id)"/>
    </xsl:variable>

    <xsl:variable name="DocSoort">
        <xsl:value-of select="/document/@documentTypeName"/>
    </xsl:variable>

    <xsl:template match="document">
        <html>
            <head> 
                <meta charset="UTF-8"/>
                <xsl:if test="./meta/description">
                    <meta name="description">
                    <xsl:attribute name="content">
                        <xsl:value-of select="./meta/description"/>
                    </xsl:attribute>
                    </meta>
                </xsl:if>    
                <title><xsl:value-of select="./naam"/></title>
             </head>
            <body data-sws-documentkey="{$DocumentKey}">
                <xsl:choose>
                    <xsl:when test="$DocSoort='Note'">
                        <xsl:apply-templates mode="Note"/>
                    </xsl:when>
                    <xsl:when test="$DocSoort='News'">
                        <xsl:apply-templates mode="News"/></xsl:when>
                </xsl:choose>
             </body>    
        </html>
    </xsl:template>

    <xsl:template match="h1" mode="#all">
    <xsl:variable name="DocSoorttitel">
        <xsl:choose>
            <xsl:when test="$DocSoort='Note'">noteitem</xsl:when>
            <xsl:when test="$DocSoort='News'">newsitem</xsl:when>
        </xsl:choose>
    </xsl:variable>
    <xsl:variable name="count">
        <xsl:value-of select="count(preceding::h1)"/>
    </xsl:variable>
        <xsl:choose>
            <xsl:when test="$count = 0">
                <xsl:copy>
                    <xsl:attribute name="class">
                        <xsl:value-of select="$DocSoorttitel"/>
                    </xsl:attribute>
                    <xsl:apply-templates select="@*[name()!='class']" mode="#current"/>
                    <xsl:apply-templates mode="#current"/>
                </xsl:copy>
            </xsl:when>
            <xsl:otherwise>
                <xsl:copy>
                    <xsl:apply-templates select="@*" mode="#current"/>
                    <xsl:apply-templates mode="#current"/>
                </xsl:copy>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

    <xsl:template match="naam|meta|lastmodified" mode="#all"/>

    <xsl:template match="*/document" mode="#all">
        <xsl:choose>
            <xsl:when test="not(./section)">
                <section>
                    <xsl:attribute name="class">
                        <xsl:value-of select="lower-case(concat('sws-', $DocSoort))"/>
                    </xsl:attribute>
                      <xsl:call-template name="groupingheaders"/>
                </section>    
            </xsl:when>
            <xsl:otherwise>
                <xsl:apply-templates mode="#current"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

    <xsl:template match="section" mode="#all">
        <section>
            <xsl:attribute name="class">
                <xsl:value-of select="lower-case(concat('level-', $DocSoort))"/>
            </xsl:attribute>
            <xsl:call-template name="groupingheaders"/>
        </section>
    </xsl:template>

    <xsl:template name="groupingheaders">
        <xsl:choose>
            <xsl:when test="$DocSoort='Note'">
                <xsl:for-each-group select="*" group-starting-with="div[@class = 'letop'][parent::document or parent::section]|h2[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
                    <xsl:choose>
                        <xsl:when test="current-group()[1]/self::div[@class = 'letop'][parent::document or parent::section]|current-group()[1]/self::h2[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
                            <section class="level">
                                <xsl:call-template name="group2"/>
                            </section>
                        </xsl:when>
                        <xsl:otherwise>
                            <xsl:apply-templates select="current-group()" mode="#current"/>
                        </xsl:otherwise>
                    </xsl:choose>
                </xsl:for-each-group>
            </xsl:when>
            <xsl:when test="$DocSoort='News'">
                <xsl:for-each-group select="*" group-starting-with="h2[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
                    <xsl:choose>
                        <xsl:when test="current-group()[1]/self::h2[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
                            <section class="level">
                                <xsl:call-template name="group2"/>
                            </section>
                        </xsl:when>
                        <xsl:otherwise>
                            <xsl:apply-templates select="current-group()" mode="#current"/>
                        </xsl:otherwise>
                    </xsl:choose>
                </xsl:for-each-group>
            </xsl:when>
        </xsl:choose>
    </xsl:template>

    <xsl:template name="group2">
        <xsl:for-each-group select="current-group()" group-starting-with="h3[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
            <xsl:choose>
                <xsl:when test="current-group()[1]/self::h3[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
                    <section class="level">
                        <xsl:call-template name="group3"/>
                    </section>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:apply-templates select="current-group()" mode="#current"/>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:for-each-group>
    </xsl:template>

    <xsl:template name="group3">
        <xsl:for-each-group select="current-group()" group-starting-with="h4[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
            <xsl:choose>
                <xsl:when test="current-group()[1]/self::h4[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
                    <section class="level">
                        <xsl:call-template name="group4"/>
                    </section>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:apply-templates select="current-group()" mode="#current"/>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:for-each-group>
    </xsl:template>

    <xsl:template name="group4">
        <xsl:for-each-group select="current-group()" group-starting-with="h5[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
            <xsl:choose>
                <xsl:when test="current-group()[1]/self::h5[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
                    <section class="level">
                        <xsl:call-template name="group5"/>
                    </section>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:apply-templates select="current-group()" mode="#current"/>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:for-each-group>
    </xsl:template>

    <xsl:template name="group5">
        <xsl:for-each-group select="current-group()" group-starting-with="h6[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
            <xsl:choose>
                <xsl:when test="current-group()[1]/self::h6[parent::document or parent::section][count(text()[normalize-space()]) > 0]">
                    <section class="level">
                        <xsl:apply-templates select="current-group()" mode="#current"/>
                    </section>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:apply-templates select="current-group()" mode="#current"/>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:for-each-group>
    </xsl:template>

    <xsl:template match="node()" mode="#all">
        <xsl:copy>
            <xsl:apply-templates select="node()" mode="#current"/>
        </xsl:copy>
    </xsl:template>


    </xsl:stylesheet>


Solution

  • A regular grouping can be done with a recursive function:

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:mf="http://example.com/mf"
        exclude-result-prefixes="xs mf"
        version="2.0">
    
        <xsl:output method="html" indent="yes"/>
    
        <xsl:param name="prefix" as="xs:string" select="'h'"/>
        <xsl:param name="max-level" as="xs:integer" select="6"/>
    
        <xsl:function name="mf:group" as="node()*">
            <xsl:param name="nodes" as="node()*"/>
            <xsl:param name="level" as="xs:integer"/>
            <xsl:for-each-group select="$nodes" group-starting-with="*[local-name() eq concat($prefix, $level)]">
                <xsl:choose>
                    <xsl:when test="self::*[local-name() eq concat($prefix, $level)]">
                        <section class="level">
                            <xsl:apply-templates select="."/>
                            <xsl:sequence select="mf:group(current-group() except ., $level + 1)"/>
                        </section>
                    </xsl:when>
                    <xsl:when test="$level lt $max-level">
                        <xsl:sequence select="mf:group(current-group(), $level + 1)"/>
                    </xsl:when>
                    <xsl:otherwise>
                        <xsl:apply-templates select="current-group()"/>
                    </xsl:otherwise>
                </xsl:choose>
            </xsl:for-each-group>
        </xsl:function>
    
        <xsl:template match="@* | node()">
            <xsl:copy>
                <xsl:apply-templates select="@* , node()"/>
            </xsl:copy>
        </xsl:template>
        <xsl:template match="*[h1 | h2 | h3 | h4 | h5 | h6]">
            <xsl:copy>
                <xsl:sequence select="mf:group(*, 1)"/>
            </xsl:copy>
        </xsl:template>
    
    </xsl:stylesheet>
    

    That transforms

    <document id="21" state="Schrijven" documentTypeName="News">
        <document track-changes="false" version="1">
            <section>
                <h1>Title h1</h1>
                <p>Some text</p>
                <p/>
                <h2>Title h2</h2>
                <p>Some text</p>
                <p/>
                <h3>Title h3</h3>
                <p>Some text</p>
                <p/>
                <h2>Title h2</h2>
                <p>Some text</p>
                <p/>
                <h4>Title h4</h4>
                <p>Some text</p>
                <p/>   
            </section>        
        </document>
    </document>
    

    into

    <document id="21" state="Schrijven" documentTypeName="News">
    
       <document track-changes="false" version="1">
    
          <section>
             <section class="level">
                <h1>Title h1</h1>
                <p>Some text</p>
                <p></p>
                <section class="level">
                   <h2>Title h2</h2>
                   <p>Some text</p>
                   <p></p>
                   <section class="level">
                      <h3>Title h3</h3>
                      <p>Some text</p>
                      <p></p>
                   </section>
                </section>
                <section class="level">
                   <h2>Title h2</h2>
                   <p>Some text</p>
                   <p></p>
                   <section class="level">
                      <h4>Title h4</h4>
                      <p>Some text</p>
                      <p></p>
                   </section>
                </section>
             </section>
          </section>        
    
       </document>
    
    </document>
    

    You need to add your templates for the other elements and maybe, if the first level needs special treatment, extend the function with some conditional checks or write a template or function for the first level.