Search code examples
xsltfilterrecords

XSLT To filter out records with letters


We have a requirement to filter records with characters in numeric fields and report them separately. I did come across the following question which has been answered - XPATH To filter out records with letters

However is there a way to mark these records with a flag or collect them in a variable as we need to report these records as invalid records. If we delete them completely the problem is that we do not have a clue on which of them were invalid.

Please suggest.

Thank You!

Input:

<?xml version="1.0" encoding="UTF-8"?>
<payload>
    <records>
        <record>
            <number>123</number>
        </record>
        <record>
            <number>456</number>
        </record> 
        <record>
            <number>78A</number>
        </record> 
    </records>
</payload>

Output:

<?xml version="1.0" encoding="UTF-8"?>
<payload>
    <records>
        <record>
            <number>123</number>
        </record>
        <record>
            <number>456</number>
        </record> 
    </records>
</payload>

XSLT solution from the link above:

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:template match="node()|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
 </xsl:template>

 <xsl:template match="record[translate(number, '0123456789', '')]"/>
</xsl:stylesheet>

Solution

  • After the match, output the original element with whatever "flag" you want (attribute, comment, processing instruction, etc.).

    Example:

    <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
        <xsl:output indent="yes"/>
        <xsl:strip-space elements="*"/>
    
        <xsl:template match="@*|node()">
            <xsl:copy>
                <xsl:apply-templates select="@*|node()"/>
            </xsl:copy>
        </xsl:template>
    
        <xsl:template match="record[string(number(number))='NaN']">
            <record invalid="true">
                <xsl:apply-templates select="@*|node()"/>
            </record>
        </xsl:template>
    
    </xsl:stylesheet>
    

    Output

    <payload>
       <records>
          <record>
             <number>123</number>
          </record>
          <record>
             <number>456</number>
          </record>
          <record invalid="true">
             <number>78A</number>
          </record>
       </records>
    </payload>
    

    You can still use your original match if you'd like.


    Edit to handle multiple number (fields) and identify the specific fields (columns) at the record level.

    Modified XML input example:

    <payload>
        <records>
            <record>
                <number>123</number>
            </record>
            <record>
                <number>456</number>
            </record> 
            <record>
                <number>321</number>
                <number>78A</number>
                <number>654</number>
                <number>abc</number>
            </record> 
        </records>
    </payload>
    

    Updated XSLT 1.0

    <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
        <xsl:output indent="yes"/>
        <xsl:strip-space elements="*"/>
    
        <xsl:template match="@*|node()">
            <xsl:copy>
                <xsl:apply-templates select="@*|node()"/>
            </xsl:copy>
        </xsl:template>
    
        <xsl:template match="record">
            <xsl:variable name="invalidCols">
                <xsl:apply-templates select="*" mode="invalid"/>
            </xsl:variable>
            <record>
                <xsl:if test="string($invalidCols)">
                    <xsl:attribute name="invalidCols">
                        <xsl:value-of select="normalize-space($invalidCols)"/>
                    </xsl:attribute>
                </xsl:if>
                <xsl:apply-templates select="@*|node()"/>
            </record>
        </xsl:template>
    
        <xsl:template match="number[string(number(.))='NaN']" mode="invalid">
            <xsl:number/>
            <xsl:text> </xsl:text>
        </xsl:template>
    
        <xsl:template match="*" mode="invalid"/>
    
    </xsl:stylesheet>
    

    Output

    <payload>
       <records>
          <record>
             <number>123</number>
          </record>
          <record>
             <number>456</number>
          </record>
          <record invalidCols="2 4">
             <number>321</number>
             <number>78A</number>
             <number>654</number>
             <number>abc</number>
             <number>123456</number>
          </record>
       </records>
    </payload>