Search code examples
xmlxsltxslt-1.0xslt-grouping

xslt-1.0 creating xml tags with <h1> & <h2>


I am very new to xslt. I want to convert below input to output. I want to group h1 & h2 elements, create attribute and add data between 2 h1 or h2 tags as child tag value. Please help. Thanks.

input.xml

<ATTRIBUTE-VALUE>
    <THE-VALUE>
        <div xmlns="http://www.w3.org/1999/xhtml">
            <h1 dir="ltr" id="_1536217498885">Main Description</h1>
            <p>Line1 The main description text goes here.</p>
            <p>Line2 The main description text goes here.</p>
            <h1 dir="ltr" id="_1536217498886">Steps</h1>
            <h2 dir="ltr" id="_1536217498886">Section Name One</h2>
            <p>Line1 The steps text goes here.</p>
            <ul>Line2 The section description text goes here.</ul>
            <h2 dir="ltr" id="_1536217498886">Section Name Two</h2>
            <p>Line1 The key consideration text goes here.</p>
            <ul><li>Line2 The key consideration text goes here.</li></ul>
            <h1 dir="ltr" id="_1536217498887">Skills</h1>
            <p>Line1 The Skills text goes here.</p>
            <p>Line2 The Skills text goes here.</p>
            <p>Line3 The Skills text goes here.</p>
            <h1 dir="ltr" id="_1536217498888">Synonyms</h1>
            <p>The Synonyms text goes here.</p>
        </div>
    </THE-VALUE>
</ATTRIBUTE-VALUE>

output.xml

<MainDescription>
    <![CDATA[
        <p>Line1 The main description text goes here.</p>
        <p>Line2 The main description text goes here.</p>
    ]]>
</MainDescription>
<Section name="Section Name One">
    <Description>
        <p>Line1 The section text goes here.</p>
        <p>Line2 The description text goes here.</p>
    </Description>
</Section>
<Section name="Section Name Two">
    <Description>
        <p>Line1 The description text goes here.</p>
        <ul><li>Line2 The description text goes here.</li></ul>
    </Description>
</Section>
<Skills>
    <p>Line1 The Skills text goes here.</p>
    <p>Line2 The Skills text goes here.</p>
    <p>Line3 The Skills text goes here.</p>
</Skills>
<Synonyms>
    <p>The Synonyms text goes here.</p>
</Synonyms>

I have used below xsl to group tags but I am not able to get that is required by output.xml. If I apply same template to both h1 & h2, I get results for only h1 or h2 with below xsl.

xsl file:

<xsl:stylesheet
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xhtml="http://www.w3.org/1999/xhtml"
    xmlns:exsl="http://exslt.org/common"
    exclude-result-prefixes="xhtml exsl"
    version="1.0">

  <xsl:import href="http://lenzconsulting.com/xml-to-string/xml-to-string.xsl"/>

  <xsl:output method="xml" indent="yes"
    cdata-section-elements="MainDescription KeyConsideration"/>
  <xsl:strip-space elements="*"/>

  <xsl:template match="/">
      <xsl:apply-templates/>
  </xsl:template>

  <xsl:template match="@* | node()">
    <xsl:copy>
      <xsl:apply-templates select="@* | node()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:key name="h1-group" match="xhtml:div/*[not(self::xhtml:h1)]" use="generate-id(preceding-sibling::xhtml:h1[1])"/>

  <xsl:template match="xhtml:div[xhtml:h1]">
      <xsl:apply-templates select="xhtml:h1"/>
  </xsl:template>

  <xsl:template match="xhtml:h1">
      <xsl:element name="{translate(., ' ', '')}">
          <xsl:variable name="rtf-with-xhtml-ns-stripped">
              <xsl:apply-templates select="key('h1-group', generate-id())"/>
          </xsl:variable>
          <xsl:apply-templates select="exsl:node-set($rtf-with-xhtml-ns-stripped)/node()" mode="xml-to-string"/>
      </xsl:element>
  </xsl:template>

  <xsl:template match="xhtml:p">
      <p>
          <xsl:apply-templates/>
      </p>
  </xsl:template>

</xsl:stylesheet>

Solution

  • I think you need to define a second key for the h2 grouping

    <xsl:key name="h2-group" match="xhtml:div/*[not(self::xhtml:h2)]" use="generate-id(preceding-sibling::xhtml:h2[1])"/>
    

    And then have a second template that matches h1 elements that contain h2 elements in the group

    <xsl:template match="xhtml:h1[key('h1-group', generate-id())[self::xhtml:h2]]">
      <xsl:apply-templates select="key('h1-group', generate-id())[self::xhtml:h2]" />
    </xsl:template>  
    

    And in the template matching h2 you would do your grouping

    <xsl:template match="xhtml:h2">
      <xsl:variable name="h1Group" select="key('h1-group', generate-id(preceding-sibling::xhtml:h1[1]))" />
      <Section name="{.}">
        <Description>
          <xsl:apply-templates select="key('h2-group', generate-id())[count(. | $h1Group) = count($h1Group)]"/>
       </Description>
      </Section>
    </xsl:template>
    

    The use of count here is to ensure you only select elements following h2 that are in the current h1 group.

    Try this XSLT

    <xsl:stylesheet
        xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xhtml="http://www.w3.org/1999/xhtml"
        xmlns:exsl="http://exslt.org/common"
        exclude-result-prefixes="xhtml exsl"
        version="1.0">
    
      <xsl:import href="http://lenzconsulting.com/xml-to-string/xml-to-string.xsl"/>
    
      <xsl:output method="xml" indent="yes" cdata-section-elements="MainDescription KeyConsideration"/>
      <xsl:strip-space elements="*"/>
    
      <xsl:key name="h1-group" match="xhtml:div/*[not(self::xhtml:h1)]" use="generate-id(preceding-sibling::xhtml:h1[1])"/>
      <xsl:key name="h2-group" match="xhtml:div/*[not(self::xhtml:h2)]" use="generate-id(preceding-sibling::xhtml:h2[1])"/>
    
      <xsl:template match="/">
          <xsl:apply-templates/>
      </xsl:template>
    
      <xsl:template match="@* | node()">
        <xsl:copy>
          <xsl:apply-templates select="@* | node()"/>
        </xsl:copy>
      </xsl:template>
    
      <xsl:template match="xhtml:div[xhtml:h1]">
          <xsl:apply-templates select="xhtml:h1"/>
      </xsl:template>
    
      <xsl:template match="xhtml:h1">
          <xsl:element name="{translate(., ' ', '')}">
              <xsl:choose>
                <xsl:when test=". = 'Main Description'">
                  <xsl:variable name="rtf-with-xhtml-ns-stripped">
                    <xsl:apply-templates select="key('h1-group', generate-id())"/>
                  </xsl:variable>
                  <xsl:apply-templates select="exsl:node-set($rtf-with-xhtml-ns-stripped)/node()" mode="xml-to-string"/>
                </xsl:when>
                <xsl:otherwise>
                  <xsl:apply-templates select="key('h1-group', generate-id())"/>
                </xsl:otherwise>
              </xsl:choose>
          </xsl:element>
      </xsl:template>
    
      <xsl:template match="xhtml:h1[key('h1-group', generate-id())[self::xhtml:h2]]">
        <xsl:apply-templates select="key('h1-group', generate-id())[self::xhtml:h2]" />
      </xsl:template>  
    
      <xsl:template match="xhtml:h2">
        <xsl:variable name="h1Group" select="key('h1-group', generate-id(preceding-sibling::xhtml:h1[1]))" />
        <Section name="{.}">
          <Description>
            <xsl:apply-templates select="key('h2-group', generate-id())[count(. | $h1Group) = count($h1Group)]"/>
          </Description>
        </Section>
      </xsl:template>
    
      <xsl:template match="xhtml:p|xhtml:ul|xhtml:li">
          <xsl:element name="{local-name()}">
              <xsl:apply-templates/>
          </xsl:element>
      </xsl:template>
    </xsl:stylesheet>