Search code examples
javaxmlsax

Skipping nodes with sax


Is it possible to skip nodes when parsing and how, does this skippedEntity have anything to do with it?

Consider this XML :

<?xml version="1.0"?>

<nutrition>

<daily-values>
    <total-fat units="g">65</total-fat>
    <saturated-fat units="g">20</saturated-fat>
    <cholesterol units="mg">300</cholesterol>
    <sodium units="mg">2400</sodium>
    <carb units="g">300</carb>
    <fiber units="g">25</fiber>
    <protein units="g">50</protein>
</daily-values>

</nutrition>

I want to skip "sodium" element


Solution

  • You could do something like the following:

    import javax.xml.parsers.SAXParser;
    import javax.xml.parsers.SAXParserFactory;
    import org.xml.sax.XMLReader;
    
    public class Demo {
    
        public static void main(String[] args) throws Exception {
            SAXParserFactory spf = SAXParserFactory.newInstance();
            SAXParser sp = spf.newSAXParser();
            XMLReader xr = sp.getXMLReader();
            xr.setContentHandler(new MyContentHandler(xr));
            xr.parse("input.xml");
        }
    }
    

    MyContentHandler

    This class is responsible for processing your XML document. When you hit a node you want to ignore you can swap in the IgnoringContentHandler which will swallow all events for that node.

    import org.xml.sax.Attributes;
    import org.xml.sax.SAXException;
    import org.xml.sax.XMLReader;
    import org.xml.sax.helpers.DefaultHandler;
    
    public class MyContentHandler extends DefaultHandler {
    
        private XMLReader xmlReader;
    
        public MyContentHandler(XMLReader xmlReader) {
            this.xmlReader = xmlReader;
        }
    
        public void startElement(String uri, String localName, String qName,
                Attributes atts) throws SAXException {
            if ("sodium".equals(qName)) {
                xmlReader.setContentHandler(new IgnoringContentHandler(xmlReader,
                        this));
            } else {
                System.out.println("START " + qName);
            }
        }
    
        public void endElement(String uri, String localName, String qName)
                throws SAXException {
            System.out.println("END " + qName);
        }
    
        public void characters(char[] ch, int start, int length)
                throws SAXException {
            System.out.println(new String(ch, start, length));
        }
    
    }
    

    IgnoringContentHandler

    When the IgnoringContentHandler is done swallowing events it passes control back to your main ContentHandler.

    import org.xml.sax.Attributes;
    import org.xml.sax.ContentHandler;
    import org.xml.sax.SAXException;
    import org.xml.sax.XMLReader;
    import org.xml.sax.helpers.DefaultHandler;
    
    public class IgnoringContentHandler extends DefaultHandler {
    
        private int depth = 1;
        private XMLReader xmlReader;
        private ContentHandler contentHandler;
    
        public IgnoringContentHandler(XMLReader xmlReader, ContentHandler contentHandler) {
            this.contentHandler = contentHandler;
            this.xmlReader = xmlReader;
        }
    
        public void startElement(String uri, String localName, String qName,
                Attributes atts) throws SAXException {
            depth++;
        }
    
        public void endElement(String uri, String localName, String qName)
                throws SAXException {
            depth--;
            if(0 == depth) {
               xmlReader.setContentHandler(contentHandler);
            }
        }
    
    }