Search code examples
treexslt-2.0

XML flat numbered index to real tree structure - XSLT


XML flat numbered index to real tree structure - XSLT

I am trying to make an index tree with numbered section logically nested within each other. I am using XSLT

2.0 and have been trying to use for-each-group ... group-starting-with to little avail.

Here is a sample input XML:

<DOC>
  <section class="AB">
    <h1>Real section header</h1>
    <p><b>1. heading</b></p>
    <p>Some text here.</p>
    <p>More text.</p>
    <p><b>1.1. setting</b></p>
    <p>More words.</p>
    <p><b>1.2. fremmer</b></p>
    <p><b>1.2.1. point</b></p>
    <p>We are sailing.</p>
    <p>Whisky in the jar.</p>
    <p><b>1.2.2.</b></p>
    <p>Johnny is the man.</p>
    <p><b>1.2.3.</b></p>
    <p>And we go on and on.</p>
    <ul>
      <li>List item one</li>
      <li>List item two</li>
      <li>List item three</li>
    </ul>
    <p><b>2. Another heading</b></p>
    <p>Here is the accompanying text.</p>
    <table>
      <tr>
        <td>1</td>
        <td>Bla bla bla.</td>
      </tr>
      <tr>
        <td>2</td>
        <td>BlaX bla bla.</td>
      </tr>
      <tr>
        <td>3</td>
        <td>BlaY bla bla.</td>
      </tr>
    </table>
    <p><b>3. Last heading</b></p>
    <p>Here is the accompanying text right now.</p>
  </section>
</DOC>

And this is what the output should be:

<DOC>
  <section class="AB">
    <h1>Real section header</h1>
    <section>
      <h1>1. heading</h1>
      <p>Some text here.</p>
      <p>More text.</p>
      <section>
        <h1>1.1. setting</h1>
        <p>More words.</p>
      </section>
      <section>
        <h1>1.2. fremmer</h1>
        <section>
          <h1>1.2.1. underpunkt</h1>
          <p>We are sailling.</p>
          <p>Whisky in the jar.</p>
        </section>
        <section>
          <h1>1.2.2.</h1>
          <p>Johnny is the man.</p>
        </section>
        <section>
          <h1>1.2.3.</h1>
          <p>And we go on and on.</p>
          <ul>
            <li>List item one</li>
            <li>List item two</li>
            <li>List item three</li>
          </ul>
        </section>
      </section>
    </section>
    <section>
      <h1>2. Another heading</h1>
      <p>Here is the accompanying text.</p>
      <table>
        <tr>
          <td>1</td>
          <td>Bla bla bla.</td>
        </tr>
        <tr>
          <td>2</td>
          <td>BlaX bla bla.</td>
        </tr>
        <tr>
          <td>3</td>
          <td>BlaY bla bla.</td>
        </tr>
      </table>
    </section>
    <section>
      <h1>3. Last heading</h1>
      <p>Here is the accompanying text right now.</p>
    </section>
  </section>
</DOC>


Solution

  • In the end it looks as if your input doesn't quite meet the suggestion from my comment as the inner p/bs don't have a space and text after the heading number but you can of course use a different test e.g.

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:mf="http://example.com/mf"
        exclude-result-prefixes="#all"
        version="3.0">
      
      <xsl:function name="mf:group" as="node()*">
        <xsl:param name="nodes" as="node()*"/>
        <xsl:param name="level" as="xs:integer"/>
        <xsl:for-each-group select="$nodes" group-starting-with="p[b[string-length(translate(replace(., '^([0-9.]+)[^0-9.]*$', '$1'), '0123456789', '')) = $level]]">
          <xsl:choose>
            <xsl:when test="self::p[b[string-length(translate(replace(., '^([0-9.]+)[^0-9.]*$', '$1'), '0123456789', '')) = $level]]">
              <section>
                <xsl:apply-templates select="."/>
                <xsl:sequence select="mf:group(tail(current-group()), $level + 1)"/>
              </section>
            </xsl:when>
            <xsl:otherwise>
              <xsl:apply-templates select="current-group()"/>
            </xsl:otherwise>
          </xsl:choose>
        </xsl:for-each-group>
      </xsl:function>
    
      <xsl:template match="section[p/b]">
        <xsl:copy>
          <xsl:apply-templates select="@*"/>
          <xsl:sequence select="mf:group(node(), 1)"/>
        </xsl:copy>
      </xsl:template>
      
      <xsl:template match="section/p[b]">
        <h1>
          <xsl:apply-templates/>
        </h1>
      </xsl:template>
      
      <xsl:template match="section/p/b">
        <xsl:apply-templates/>
      </xsl:template>
      
      <xsl:mode on-no-match="shallow-copy"/>
    
      <xsl:output method="xml" indent="yes" html-version="5"/>
      <xsl:strip-space elements="*"/>
    
    </xsl:stylesheet>
    

    That is XSLT 3, for XSLT 2.0 you would need to spell out the xsl:mode declaration as e.g.

    <xsl:template match="@* | node()">
      <xsl:copy>
        <xsl:apply-templates select="@* | node()"/>
      </xsl:copy>
    </xsl:template>
    

    and use subsequence(current-group(), 2) instead of tail(current-group()).