Search code examples
xslt

XSLT - Comparing 2 documents for matching parent and child elements


If I have the following two XML docs:

untranslated file:

 <body>
     <trans-unit id="Id a" maxwidth="240" size-unit="char">
                <source>original a</source>
     </trans-unit>
     <trans-unit id="Id b" maxwidth="240" size-unit="char">
                <source>original b</source>
     </trans-unit>
     <trans-unit id="Id c" maxwidth="240" size-unit="char">
                <source>original c</source>
     </trans-unit>
 </body>

translated file:

 <body>
     <trans-unit id="Id a" maxwidth="240" size-unit="char">
                <source>original a</source>
     </trans-unit>
     <trans-unit id="Id b" maxwidth="240" size-unit="char">
                <source>translated b</source>
     </trans-unit>
 </body>

I want to be able to output only the items where the source is different between the two. In this case the trans-unit 'Id a' is untranslated (the source is the same between both docs) so I want to exclude it. I only want to see the trans-unit 'Id b' from the second translated file because the source element contents is different.

Also, I cannot guarantee that both trans-units are in each file, as per 'Id c'.

Final output document:

 <body>
     <trans-unit id="Id b" maxwidth="240" size-unit="char">
                <source>translated b</source>
     </trans-unit>
 </body>

I don't have much experience with XSL, I am assuming this will involve holding one of the document's contents in a variable and performing tests against it but I could be incorrect

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="utf-8" indent="yes"/>

<xsl:variable name="mytranslatedfile" select="document('translatedfile.xml')" />
<xsl:variable name="translated" select="$mytranslatedfile/trans-unit/id" />

<xsl:template match="trans-unit/source">
     <xsl:choose>

<!----- if the id is the same between the documents ----->

     <xsl:when test="$translated[ancestor::trans-unit[1]/@id]">

<!----- add an additional test here to compare the source? unsure how to do this ----->

<!----- I want to output the trans-unit element and child source element here ----->
     <P> <xsl:value-of select="."/> </P>

     </xsl:when>
     </xsl:choose>
</xsl:template>

</xsl:stylesheet>

Solution

  • With XSLT 3 and keys (second document is inlined for self containedness of example but could be a <xsl:param name="translation" select="doc('translations.xml')"/> instead:

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      version="3.0"
      xmlns:xs="http://www.w3.org/2001/XMLSchema"
      exclude-result-prefixes="#all">
      
      <xsl:key name="translated" match="trans-unit" use="@id"/>
      
      <xsl:key name="source" match="trans-unit" composite="yes" use="@id, source"/>
      
      <xsl:variable name="main" select="/"/>
    
      <xsl:template match="body">
        <xsl:copy>
          <xsl:copy-of select="trans-unit/key('translated', @id, $translation)[not(key('source', (@id, source), $main))]"/>
        </xsl:copy>
      </xsl:template>
      
      <xsl:param name="translation">
         <body>
             <trans-unit id="Id a" maxwidth="240" size-unit="char">
                        <source>original a</source>
             </trans-unit>
             <trans-unit id="Id b" maxwidth="240" size-unit="char">
                        <source>translated b</source>
             </trans-unit>
         </body>    
      </xsl:param>
      
    </xsl:stylesheet>
    

    Example fiddle is here.