Search code examples
javaxmlsaxsaxparser

Get parent element on XSD validation error


I have the following xml with unit & measure child elements.

<Depth>
    <measure>1.00</measure>
    <unit>in</unit>
<Depth>
<Width>
    <measure>1.00</measure>
    <unit>in</unit>
</Width>
<vendorPackHeight>
    <measure>1.00</measure>
    <unit>in</unit>
</vendorPackHeight>
<Weight>
    <measure>7.00</measure>
    <unit>LBS</unit> //invalid expected value is lb
</Weight> 

When XSD validation fails for either unit or measure child element with an error like cvc-enumeration-valid when measure is not from a set of enum values or unit values fail with cvc-datatype-valid.1.2.1 when the data type does not match how can I get the parent element? In the above xml it would be Weight.

In the SAXParseException I get the line number where the error occurs. Is it possible to get the element from the line number and then get its parent?


Solution

  • I dont think there is a standard way of doing this in the Java API. However some libraries do allow you to sneek a peek at what element its currently on. For instance in the Apache Xerces implementation it supports getting the current node via

    getProperty("http://apache.org/xml/properties/dom/current-element-node")

    Checkout their documentation about that property at their website: https://xerces.apache.org/xerces2-j/properties.html#dom.current-element-node


    The Xerces library is provided by your JDK by default but can also be imported into your project as a 3rd party library. I would recommend adding it if you MUST have it for running your application correctly. Heres a bit of sample code which validates a XML document to an XSD and gets the current node.

    import java.io.ByteArrayInputStream;
    import java.io.IOException;
    
    import javax.xml.XMLConstants;
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.Source;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamSource;
    import javax.xml.validation.Schema;
    import javax.xml.validation.SchemaFactory;
    import javax.xml.validation.Validator;
    
    import org.apache.xerces.impl.Constants;
    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.ErrorHandler;
    import org.xml.sax.SAXException;
    import org.xml.sax.SAXNotRecognizedException;
    import org.xml.sax.SAXNotSupportedException;
    import org.xml.sax.SAXParseException;
    
    public class XSDTest {
    
        public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException {
    
            // our XSD, which defines 1 node  <TheNode> which must have decimal text content 
            byte [] schemaData = ("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
                    + "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\">"
                        + "<xs:simpleType name=\"theNodeType\">"
                            + "<xs:restriction base=\"xs:decimal\"/>"
                        + "</xs:simpleType>"
                        + "<xs:element name=\"TheNode\" type=\"theNodeType\"/>"
                    + "</xs:schema>").getBytes();
            // our invalid xml
            byte [] xmlData = "<TheNode>123NotADecimal</TheNode>".getBytes();
    
            // parse schema
            SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
            Source schemaSource = new StreamSource(new ByteArrayInputStream(schemaData));
            Schema schema = schemaFactory.newSchema(schemaSource);
    
            // build our document, must use document builder to enable xerces parser properties for DOM
            // Also must be a xerces implementation of the DBF, should be enabled by default in a standard java project but just to be verbose about it
            // pass in the full name of the DBF impl
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(org.apache.xerces.jaxp.DocumentBuilderFactoryImpl.class.getName(), XSDTest.class.getClassLoader());
            dbf.setNamespaceAware(true);
            Document doc = dbf.newDocumentBuilder().parse(new ByteArrayInputStream(xmlData));
    
    
            // configure our validator and parse the document.
            Validator validator = schema.newValidator();
            validator.setErrorHandler(new MyErrorHandler(validator));
            validator.validate(new DOMSource(doc.getDocumentElement()));
        }
    
        private static class MyErrorHandler implements ErrorHandler {
            private final Validator xsdValidator;
    
            public MyErrorHandler(Validator xsdValidator) {
                this.xsdValidator = xsdValidator;
            }
            @Override
            public void warning(SAXParseException exception) throws SAXException {
                System.out.println("Warning on node: " + getCurrentNode());
                System.out.println(exception.getLocalizedMessage());
            }
    
            @Override
            public void error(SAXParseException exception) throws SAXException {
                System.out.println("Error on node: " + getCurrentNode());
                System.out.println(exception.getLocalizedMessage());
            }
    
            @Override
            public void fatalError(SAXParseException exception) throws SAXException {
                System.out.println("Fatal on node: " + getCurrentNode());
                System.out.println(exception.getLocalizedMessage());
            }
    
    
            private Node getCurrentNode() throws SAXNotRecognizedException, SAXNotSupportedException {
                // get prop "http://apache.org/xml/properties/dom/current-element-node"
                // see https://xerces.apache.org/xerces2-j/properties.html#dom.current-element-node
                return (Node)xsdValidator.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.CURRENT_ELEMENT_NODE_PROPERTY);
            }
        }
    
    }
    

    Output:

    Error on node: [TheNode: null]
    cvc-datatype-valid.1.2.1: '123NotADecimal' is not a valid value for 'decimal'.
    Error on node: [TheNode: null]
    cvc-type.3.1.3: The value '123NotADecimal' of element 'TheNode' is not valid.