Hi there – I'm new to XSLT:
I have two XML TEI files with some matching elements. I would like to replace every element in the fist file TEI_test.xml
with the corresponding <w>
element in the second file lookup.xml
, such that the attribute @lemma
in the first file, matches the attribute @lemma
in the second file (lookup.xml
). All attributes and children of should also be copied over, except for the actual text()
. If there is no match, then the original TEI_test.xml
element should be preserved.
This is the TEI_test.xml
file:
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader><fileDesc>
<titleStmt>
<title></title>
</titleStmt>
<publicationStmt><publisher></publisher></publicationStmt>
<sourceDesc><p></p></sourceDesc>
</fileDesc>
</teiHeader><text><body>
<p xml:lang="arn" n="3">
<w xml:lang="" lemma="ta">ta</w>
<w xml:lang="" lemma="ella">ella</w>
<w xml:lang="" lemma="rüpü">rùpù</w>
<w xml:lang="" lemma="rüpüwe">rùpùwe</w>
</p>
</body>
</text>
</TEI>
This is the lookup table: lookup.xml
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader><fileDesc>
<titleStmt>
<title></title>
</titleStmt>
<publicationStmt><publisher></publisher></publicationStmt>
<sourceDesc><p></p></sourceDesc>
</fileDesc>
</teiHeader><text><body><p>
<w xml:lang="arn" lemma="mew" pos="P"><m baseForm="mew" type="root" corresp="P">meu</m></w>
<w xml:lang="arn" lemma="ta" pos="DA"><m baseForm="ta" type="root" corresp="DA">ta</m></w>
<w xml:lang="arn" lemma="rüpü" pos="N" corresp="path/road"><m baseForm="rüpü" type="root" corresp="path/road">rüpü</m></w>
<w xml:lang="arn" lemma="rüpüwe" pos="N" corresp="place of path/road"><m baseForm="rüpü" type="root" corresp="path/road">rüpü</m><m baseForm="we" type="instrumental">we</m></w>
</p>
</body></text></TEI>
The XSLT I came up with is as follows:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:param name="path-to-lookup" select="'lookup1.xml'" />
<xsl:param name="path-to-orig" select="'TEI_test.xml.xml'" />
<!-- identity transform -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="tei:w">
<xsl:choose>
<xsl:when test="@lemma =document($path-to-lookup)//tei:w[@lemma]">
<xsl:copy-of select="document($path-to-lookup)//tei:w[@lemma=current()/@lemma]">
</xsl:copy-of>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="."/>
</xsl:otherwise></xsl:choose>
<xsl:choose>
<xsl:when test="@lemma =document($path-to-lookup)//tei:w[@lemma]">
<xsl:copy-of select="document($path-to-orig)//tei:w[text()=current()/text()]"/>
</xsl:when>
<xsl:otherwise>
</xsl:otherwise></xsl:choose>
</xsl:template>
</xsl:stylesheet>`
While this manages to copy all the <w>
nodes where there is a w[@lemma]
match, it does not produce the expected results when it comes to the preservation of the w/text()
, which is meant to be addressed by the second <xsl:choose>
series. Here is what I get:
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader>
<fileDesc>
<titleStmt>
<title/>
</titleStmt>
<publicationStmt>
<publisher/>
</publicationStmt>
<sourceDesc>
<p/>
</sourceDesc>
</fileDesc>
</teiHeader>
<text>
<body>
<p xml:lang="arn" n="3">
<w xml:lang="arn" lemma="ta" pos="DA">
<m baseForm="ta" type="root" corresp="DA">ta</m>
</w>
<w xml:lang="" lemma="ta">ta</w>
<w xml:lang="" lemma="ella">ella</w>
<w xml:lang="arn" lemma="rüpü" pos="N" corresp="path/road">
<m baseForm="rüpü" type="root" corresp="path/road">rüpü</m>
</w>
<w xml:lang="" lemma="rüpü">rùpù</w>
<w xml:lang="arn" lemma="rüpüwe" pos="N" corresp="place of path/road">
<m baseForm="rüpü" type="root" corresp="path/road">rüpü</m>
<m baseForm="we" type="instrumental">we</m>
</w>
</p>
</body>
</text>
</TEI>
What I actually want to obtain is this:
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader>
<fileDesc>
<titleStmt>
<title/>
</titleStmt>
<publicationStmt>
<publisher/>
</publicationStmt>
<sourceDesc>
<p/>
</sourceDesc>
</fileDesc>
</teiHeader>
<text>
<body>
<p xml:lang="arn" n="3">
<w xml:lang="arn" lemma="ta" pos="DA">
<m baseForm="ta" type="root" corresp="DA">ta</m>
</w>
<w xml:lang="" lemma="ella">ella</w>
<w xml:lang="arn" lemma="rüpü" pos="N" corresp="path/road">
<m baseForm="rüpü" type="root" corresp="path/road">rùpù</m>
</w>
<w xml:lang="arn"
lemma="rüpüwe" pos="N" corresp="place of path/road">
<m baseForm="rüpü" type="root" corresp="path/road">rùpùwe</m>
<m baseForm="we" type="instrumental">rùpùwe</m>
</w>
</p>
</body>
</text>
</TEI>
Any ideas?
I would use a key, in XSLT 2/3 you can do it elegantly with the third argument of the key
function, modes and a tunnel parameter:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xpath-default-namespace="http://www.tei-c.org/ns/1.0"
exclude-result-prefixes="#all"
version="3.0">
<xsl:param name="lookup-doc" select="document('lookup.xml')"/>
<xsl:key name="ref" match="*[@lemma]" use="@lemma"/>
<xsl:mode on-no-match="shallow-copy"/>
<xsl:mode name="ref-copy" on-no-match="shallow-copy"/>
<xsl:template match="*[key('ref', @lemma, $lookup-doc)]">
<xsl:apply-templates select="key('ref', @lemma, $lookup-doc)" mode="ref-copy">
<xsl:with-param name="text" select="string()" tunnel="yes"/>
</xsl:apply-templates>
</xsl:template>
<xsl:template match="text()" mode="ref-copy">
<xsl:param name="text" tunnel="yes"/>
<xsl:value-of select="$text"/>
</xsl:template>
</xsl:stylesheet>
https://xsltfiddle.liberty-development.net/3NSSEuR
With XSLT 1 you need to spell out the identity transformation for the two modes and have the second mode hand on the parameter instead of relying on tunnel parameters. You will also need to make the check for the existence of the lookup from inside the template, changing context with for-each select="$lookup-doc"
. Oh, and you have to declare a prefix for the namespace:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:exsl="http://exslt.org/common"
xmlns:msxml="urn:schemas-microsoft-com:xslt"
xmlns:tei="http://www.tei-c.org/ns/1.0"
exclude-result-prefixes="exsl msxml"
version="1.0">
<xsl:param name="lookup-doc" select="document('lookup.xml')"/>
<xsl:key name="ref" match="tei:*[@lemma]" use="@lemma"/>
<xsl:template match="@* | node()" name="identity">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="@* | node()" mode="ref-copy">
<xsl:param name="text"/>
<xsl:copy>
<xsl:apply-templates select="@* | node()" mode="ref-copy">
<xsl:with-param name="text" select="$text"/>
</xsl:apply-templates>
</xsl:copy>
</xsl:template>
<xsl:template match="tei:*[@lemma]">
<xsl:variable name="this" select="."/>
<xsl:for-each select="$lookup-doc">
<xsl:variable name="ref-el" select="key('ref', $this/@lemma)"/>
<xsl:choose>
<xsl:when test="$ref-el">
<xsl:apply-templates select="$ref-el" mode="ref-copy">
<xsl:with-param name="text" select="string($this)"/>
</xsl:apply-templates>
</xsl:when>
<xsl:otherwise>
<xsl:for-each select="$this">
<xsl:call-template name="identity"/>
</xsl:for-each>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<xsl:template match="text()" mode="ref-copy">
<xsl:param name="text"/>
<xsl:value-of select="$text"/>
</xsl:template>
</xsl:stylesheet>