Search code examples
xmlxslt-2.0xslt-grouping

xsl:for-each-group is not working as expected in xsl 2.0


We have a ongoing project where xsl use to convert openXML to custom XML, In this case always we were getting single section content but now we have some nested section are coming. I done lots of things by using for-each-group to achieve my result but in nesting part its getting stuck. Seeking help to understand term grouping here: Here is things i tried as well: http://xsltransform.net/pPqteBi

Sample XML:

    <?xml version="1.0" encoding="utf-8" standalone="yes"?>
<w:body xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
    <w:p>
        <w:pPr>
            <w:pStyle w:val="TRH1"/>
        </w:pPr>
        <w:r>
            <w:t>Heading 1 Title</w:t>
        </w:r>
    </w:p>
    <w:p>
        <w:pPr>
            <w:pStyle w:val="TRSubtitle1"/>
        </w:pPr>
        <w:r>
            <w:t>section heading 1</w:t>
        </w:r>
    </w:p>
    <w:p>
        <w:pPr>
            <w:pStyle w:val="TRNormal"/>
        </w:pPr>
        <w:r>
            <w:t>section 1 First para</w:t>
        </w:r>
    </w:p>

    <w:p>
        <w:pPr>
            <w:pStyle w:val="TRNormal"/>
        </w:pPr>
        <w:r>
            <w:t>section 1 2 para</w:t>
        </w:r>
    </w:p>

    <w:p>
        <w:pPr>
            <w:pStyle w:val="TRSubtitle2"/>
        </w:pPr>
        <w:r>
            <w:t>Section title 1.1</w:t>
        </w:r>
    </w:p>
    <w:p>
        <w:pPr>
            <w:pStyle w:val="TRNormal"/>
        </w:pPr>
        <w:r>
            <w:t>First para of section 1.1 </w:t>
        </w:r>
    </w:p>
    <w:table>table</w:table>
    <w:p>
        <w:pPr>
            <w:pStyle w:val="TRSubtitle1"/>
        </w:pPr>
        <w:r>
            <w:t>section title 2</w:t>
        </w:r>
    </w:p>
    <w:p>
        <w:pPr>
            <w:pStyle w:val="TRNormal"/>
        </w:pPr>
        <w:r>
            <w:t>section 2 First para</w:t>
        </w:r>
    </w:p>

</w:body>

Modified XSL:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:pkg="http://schemas.microsoft.com/office/2006/xmlPackage"
    xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas"
    xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex"
    xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex"
    xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex"
    xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex"
    xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex"
    xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex"
    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
    xmlns:o="urn:schemas-microsoft-com:office:office"
    xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
    xmlns:v="urn:schemas-microsoft-com:vml"
    xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing"
    xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
    xmlns:w10="urn:schemas-microsoft-com:office:word"
    xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
    xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"
    xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml"
    xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
    xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup"
    xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk"
    xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape"
    xmlns:random="java:java.util.Random" xmlns:mf="http://example.com/mf"
    exclude-result-prefixes="#all" version="2.0">

    <xsl:output indent="yes" method="xml" encoding="UTF-8"/>

    <xsl:strip-space elements="*"/>

    <xsl:template match="pkg:part[not(descendant::w:document)]"/>

    <!-- Removed as per srikanth discussion because footnotes printing numbers in this tags. -->
    <xsl:template match="w:hyperlink"/>

    <xsl:template match="w:body">
        <chapter-poc>
            <ctitle>XXX XXX</ctitle>
            <xsl:for-each-group select="*[normalize-space()]"
                group-starting-with="w:p[w:pPr/w:pStyle[@w:val='TRSubtitle1']]">
                <xsl:choose>
                    <xsl:when test="current-group()[self::w:p[w:pPr/w:pStyle[@w:val='TRH1']]]">
                        <sub-chapter>
                            <sctitle>
                                <xsl:apply-templates/>
                            </sctitle>
                        </sub-chapter>
                    </xsl:when>
                    <xsl:otherwise>
                        <section>
                            <xsl:for-each select="current-group()">

                                <xsl:choose>
                                    <xsl:when
                                        test="self::w:p[w:pPr/w:pStyle[@w:val=('TRSubtitle1','TRSubtitle2')]]">
                                        <stitle>
                                            <xsl:apply-templates/>
                                        </stitle>
                                    </xsl:when>
                                    <xsl:when test="self::w:p">
                                        <para
                                            num="{count(preceding-sibling::w:p[not(w:pPr/w:pStyle[@w:val='TRH1' or @w:val='TRSubtitle1' or @w:val='TRSubtitle2'])])+1}">
                                            <xsl:apply-templates/>
                                        </para>
                                    </xsl:when>

                                    <xsl:otherwise>
                                        <!-- Just apply templates if any new tags are coming (Ex. table and others elemets) so this will print only value, it will not drop any data. -->
                                        <xsl:copy copy-namespaces="no">
                                            <xsl:apply-templates/>
                                        </xsl:copy>
                                    </xsl:otherwise>
                                </xsl:choose>
                            </xsl:for-each>

                        </section>
                    </xsl:otherwise>
                </xsl:choose>
            </xsl:for-each-group>
        </chapter-poc>
    </xsl:template>

</xsl:stylesheet>

Current output:

<?xml version="1.0" encoding="UTF-8"?>
<chapter-poc>
   <ctitle>XXX XXX</ctitle>
   <sub-chapter>
      <sctitle>Heading 1 Title</sctitle>
   </sub-chapter>
   <section>
      <stitle>section heading 1</stitle>
      <para num="1">section 1 First para</para>
      <para num="2">section 1 2 para</para>
      <stitle>Section title 1.1</stitle>
      <para num="3">First para of section 1.1 </para>
      <w:table xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">table</w:table>
   </section>
   <section>
      <stitle>section title 2</stitle>
      <para num="4">section 2 First para</para>
   </section>
</chapter-poc>

Desire output:

<?xml version="1.0" encoding="UTF-8"?>
<chapter-poc>
   <ctitle>XXX XXX</ctitle>
   <sub-chapter>
      <sctitle>Heading 1 Title</sctitle>
   </sub-chapter>
   <section>
      <stitle>section heading 1</stitle>
      <para num="1">section 1 First para</para>
      <para num="2">section 1 2 para</para>
      <section>
         <stitle>Section title 1.1</stitle>
         <para num="3">First para of section 1.1 </para>
         <w:table xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">table</w:table>
      </section>
   </section>
   <section>
      <stitle>section title 2</stitle>
      <para num="4">section 2 First para</para>
   </section>
</chapter-poc>

Solution

  • It is possible to use grouping in a recursive function:

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
        xmlns:mf="http://example.com/mf"
        exclude-result-prefixes="#all"
        version="3.0">
      
      <xsl:function name="mf:group-sections" as="node()*">
        <xsl:param name="elements" as="element(*)*"/>
        <xsl:param name="index" as="xs:integer"/>
        <xsl:for-each-group select="$elements" group-starting-with="w:p[w:pPr/w:pStyle[@w:val='TRSubtitle' || $index]]">
          <xsl:choose>
              <xsl:when test="self::w:p[w:pPr/w:pStyle[@w:val='TRSubtitle' || $index]]">
                  <section>
                    <xsl:sequence select="mf:group-sections(current-group(), $index + 1)"/>
                  </section>
              </xsl:when>
              <xsl:otherwise>
                <xsl:apply-templates select="current-group()"/>
              </xsl:otherwise>
          </xsl:choose>      
        </xsl:for-each-group>
      </xsl:function>
      
      <xsl:output indent="yes" method="xml" encoding="UTF-8"/>
    
      <xsl:strip-space elements="*"/>
    
      <xsl:template match="w:pPr"/>
    
      <xsl:template match="w:body">
        <chapter-poc>
            <ctitle>XXX XXX</ctitle>
            <xsl:sequence select="mf:group-sections(*[normalize-space()], 1)"/>
        </chapter-poc>
      </xsl:template>
      
      <xsl:template match="w:p[w:pPr/w:pStyle[@w:val = ((1 to 6)!('TRSubtitle' || .))]]">
        <stitle>
          <xsl:apply-templates/>
        </stitle>
      </xsl:template>
      
      <xsl:template match="*">
        <xsl:copy copy-namespaces="no">
          <xsl:apply-templates/>
        </xsl:copy>
      </xsl:template>
      
      <xsl:template match="w:p">
        <xsl:variable name="p-num" as="xs:integer">
          <xsl:number count="w:p[not(w:pPr/w:pStyle[@w:val='TRH1' or @w:val='TRSubtitle1' or @w:val='TRSubtitle2'])]"/>
        </xsl:variable>
        <para num="{$p-num}">
          <xsl:apply-templates/>
        </para>
      </xsl:template>
      
      <xsl:template match="w:p[w:pPr/w:pStyle[@w:val='TRH1']]">
        <sub-chapter>
            <sctitle>
                <xsl:apply-templates/>
            </sctitle>
        </sub-chapter>    
      </xsl:template>
      
      <xsl:template match="w:r | w:t">
        <xsl:apply-templates/>
      </xsl:template>
    
    </xsl:stylesheet>
    

    I have made use of some XPath 3 constructs like ||, use the concat function instead if you use an XSLT 2 processor.