Search code examples
xsltxslt-grouping

Removing duplicate XML nodes in XSLT


I would appreciated if someone could help me to create xslt to remove duplicates nodes from XML based on duplicated element's value(PlayBack--ControlInfo-ControlName).

I want to remove all the duplicate elements(PlayBack--ControlInfo-ControlName) from the GStep/Step

Input XML

<?xml version="1.0" encoding="utf-8"?>
      <Document>
       <Meta>
         <GpsFile>notepad_may_30_file</GpsFile>
         <GpsId>36fa4fe8-9691-4a7f-8bc1-9543f6b7d29a</GpsId>
          <ExePath>
             <ExePath1>C:\WINDOWS\SYSTEM32\notepad.exe</ExePath1>
          </ExePath>
        </Meta>
         <Process>
            <GStep DialogName="Untitled - Notepad">
             <Step DialogName="Untitled - Notepad">
              <Step-ID>3</Step-ID>     
              <PlayBack--ControlInfo-ControlName />      
            </Step>
            <Step DialogName="Untitled - Notepad">
              <Step-ID>4</Step-ID>     
              <PlayBack--ControlInfo-ControlName />     
            </Step>
            <Step DialogName="Untitled - Notepad">
              <Step-ID>5</Step-ID>      
              <PlayBack--ControlInfo-ControlName>Edit</PlayBack--ControlInfo-ControlName>     
            </Step>
            <Step DialogName="Untitled - Notepad">
              <Step-ID>6</Step-ID>      
              <PlayBack--ControlInfo-ControlName>Replace...\tCtrl+H</PlayBack--ControlInfo-ControlName>     
            </Step>
            <Step DialogName="Untitled - Notepad">
              <Step-ID>12</Step-ID>     
              <PlayBack--ControlInfo-ControlName />     
            </Step>
            <Step DialogName="Untitled - Notepad">
              <Step-ID>13</Step-ID>     
              <PlayBack--ControlInfo-ControlName>Edit</PlayBack--ControlInfo-ControlName>      
            </Step>
            <Step DialogName="Untitled - Notepad">
              <Step-ID>14</Step-ID>      
              <PlayBack--ControlInfo-ControlName>Replace...\tCtrl+H</PlayBack--ControlInfo-ControlName>      
            </Step>
            <Step DialogName="Untitled - Notepad">
              <Step-ID>15</Step-ID>     
              <PlayBack--ControlInfo-ControlName>Cancel</PlayBack--ControlInfo-ControlName>
            </Step>
          </GStep>
          <GStep DialogName="Replace">
             <Step DialogName="Replace">
                <Step-ID>8</Step-ID>     
                <PlayBack--ControlInfo-ControlName />      
             </Step>
             <Step DialogName="Replace">
                <Step-ID>9</Step-ID>      
                <PlayBack--ControlInfo-ControlName>Cancel</PlayBack--ControlInfo-ControlName>     
             </Step>
             <Step DialogName="Replace">
                <Step-ID>10</Step-ID>      
                <PlayBack--ControlInfo-ControlName />     
             </Step>
             <Step DialogName="Replace">
                <Step-ID>16</Step-ID>     
                <PlayBack--ControlInfo-ControlName />     
             </Step>
           </GStep>
         </Process>
       </Document>

Actually expecting a result like below.

 <?xml version="1.0" encoding="utf-8"?>
       <Document>
         <Meta>
           <GpsFile>notepad_may_30_file</GpsFile>
           <GpsId>36fa4fe8-9691-4a7f-8bc1-9543f6b7d29a</GpsId>
           <ExePath>
              <ExePath1>C:\WINDOWS\SYSTEM32\notepad.exe</ExePath1>
          </ExePath>
        </Meta>
        <Process>
              <GStep DialogName="Untitled - Notepad">
              <Step DialogName="Untitled - Notepad">
                 <Step-ID>3</Step-ID>     
                 <PlayBack--ControlInfo-ControlName />      
              </Step>
              <Step DialogName="Untitled - Notepad">
                   <Step-ID>5</Step-ID>      
                   <PlayBack--ControlInfo-ControlName>Edit</PlayBack--ControlInfo-ControlName>     
              </Step>
             <Step DialogName="Untitled - Notepad">
                  <Step-ID>6</Step-ID>      
                  <PlayBack--ControlInfo-ControlName>Replace...\tCtrl+H</PlayBack--ControlInfo- 
                      ControlName>     
            </Step>
            <Step DialogName="Untitled - Notepad">
                 <Step-ID>15</Step-ID>     
                 <PlayBack--ControlInfo-ControlName>Cancel</PlayBack--ControlInfo-ControlName>
             </Step>
        </GStep>
       <GStep DialogName="Replace">
             <Step DialogName="Replace">
              <Step-ID>8</Step-ID>     
              <PlayBack--ControlInfo-ControlName />      
            </Step>
            <Step DialogName="Replace">
                <Step-ID>9</Step-ID>      
                <PlayBack--ControlInfo-ControlName>Cancel</PlayBack--ControlInfo-ControlName>     
            </Step>         
         </GStep>
     </Process>
   </Document>

I tried with the below xslt code snippet.

  <xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output omit-xml-declaration="yes" indent="yes"/>
  <xsl:strip-space elements="*"/>
  <xsl:key name="ControlNameInfo" match="Step" use="PlayBack--ControlInfo-ControlName"/>
     <xsl:template match="node()|@*">
      <xsl:copy>
         <xsl:apply-templates select="node()|@*"/>
      </xsl:copy>
    </xsl:template>
   <xsl:template match="GStep/Step[not(generate-id() = generate-id(key('ControlNameInfo', PlayBack-- 
        ControlInfo-ControlName)[1]))]"/>
        </xsl:stylesheet>

  Can anyone help 
  Thanks very much.

Solution

  • In order to keep only distinct Step nodes in each GStep , you must include the parent GStep in the key. Try:

    XSLT 1.0

    <xsl:stylesheet version="1.0" 
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
    <xsl:strip-space elements="*"/>
    
    <xsl:key name="k1" match="Step" use="concat(PlayBack--ControlInfo-ControlName, '|', generate-id(..))"/>
    
    <!-- identity transform -->
    <xsl:template match="@*|node()">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()"/>
        </xsl:copy>
    </xsl:template>
    
    <xsl:template match="GStep">
        <xsl:copy>
            <xsl:apply-templates select="Step[generate-id()=generate-id(key('k1', concat(PlayBack--ControlInfo-ControlName, '|', generate-id(..)))[1])]"/>
        </xsl:copy>
    </xsl:template>
    
    </xsl:stylesheet>