Search code examples
javaxmlparsingsax

Building strings correctly with SAX Parser Java


I'm trying to read an XML file that has an unknown structure. This might be one file:

<S:Envelope xmlns:S="http://anamespace">envelopeStart
    <S:Body>bodyStart
        <ns2:getNextResponse xmlns:ns2="http://anothernamespace">getNextResponseStart
            <nextValue>9</nextValue>
        getNextResponseEnd</ns2:getNextResponse>
    bodyEnd</S:Body>
envelopeEnd</S:Envelope>

This is the handler I'm actually using:

DefaultHandler handler = new DefaultHandler() {
    StringBuilder builder;
    Map<String, String> values = new HashMap<String, String>();
    
    @Override
    public void startElement(String uri, String localName, String qName,
                             Attributes attributes) throws SAXException {
        builder = new StringBuilder();
    }

    @Override
    public void characters(char ch[], int start, int length) throws SAXException {
        builder.append(new String(ch, start, length));
    }

    @Override
    public void endElement(String uti, String localName, String qName) throws SAXException {
        values.put(localName, builder.toString());
        builder.setLength(0);
    }
}

The problem I'm facing is that if I instantiate a new builder for every new tag that is parsed, I lose all the Start texts that I've read until now (assume the characters method is returning all characters in a single call):

new Builder for the Envelope tag
reading characters: envelopeStart
new Builder for the Body tag
reading characters: bodyStart
...
new Builder for the nextValue tag <- this is the last reference to the builder that I have to use from now on
reading characters: 9
endElement: saving to Map ('nextValue', '9') and resetting length of the last builder instantiated 
reading characters: getNextResponseEnd
endElement: saving to Map ('getNextResponse', 'getNextResponseEnd') and resetting length of the last builder instantiated
...

In this case the values HashMap will have these values:

nextValue=9
getNextResponse=getNextResponseEnd (missing getNextResponseStart)
body=bodyEnd (missing bodyStart)
envelope=envelopeEnd (missing envelopeStart)

Is there a way I can save in the map the start and end String for each tag?


Solution

  • Just keep a stack of StringBuilders:

    import org.xml.sax.Attributes;
    import org.xml.sax.InputSource;
    import org.xml.sax.SAXException;
    import org.xml.sax.XMLReader;
    import org.xml.sax.helpers.DefaultHandler;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.parsers.SAXParser;
    import javax.xml.parsers.SAXParserFactory;
    import java.io.IOException;
    import java.io.StringReader;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.Stack;
    
    public class Example {
        public static void main(String... args) throws ParserConfigurationException, SAXException, IOException {
            Map<String, String> values = new HashMap<String, String>();
    
            DefaultHandler handler = new DefaultHandler() {
                Stack<StringBuilder> builders = new Stack<>();
    
                @Override
                public void startElement(String uri, String localName, String qName,
                                         Attributes attributes) throws SAXException {
                    builders.push(new StringBuilder());
                }
    
                @Override
                public void characters(char ch[], int start, int length) throws SAXException {
                    builders.peek().append(new String(ch, start, length));
                }
    
                @Override
                public void endElement(String uti, String localName, String qName) throws SAXException {
                    values.put(localName, builders.peek().toString());
                    builders.pop();
                }
            };
    
            String xml = "<S:Envelope xmlns:S=\"http://anamespace\">envelopeStart\n" +
                         "    <S:Body>bodyStart\n" +
                         "        <ns2:getNextResponse xmlns:ns2=\"http://anothernamespace\">getNextResponseStart\n" +
                         "            <nextValue>9</nextValue>\n" +
                         "        getNextResponseEnd</ns2:getNextResponse>\n" +
                         "    bodyEnd</S:Body>\n" +
                         "envelopeEnd</S:Envelope>";
            SAXParserFactory spf = SAXParserFactory.newInstance();
            spf.setNamespaceAware(true);
            SAXParser saxParser = spf.newSAXParser();
            XMLReader xmlReader = saxParser.getXMLReader();
            xmlReader.setContentHandler(handler);
            xmlReader.parse(new InputSource(new StringReader(xml)));
        }
    }