Search code examples
javaxmlsax

Better way to parse xml


I've been parsing XML like this for years, and I have to admit when the number of different element becomes larger I find it a bit boring and exhausting to do, here is what I mean, sample dummy XML:

<?xml version="1.0"?>
<Order>
    <Date>2003/07/04</Date>
    <CustomerId>123</CustomerId>
    <CustomerName>Acme Alpha</CustomerName>
    <Item>
        <ItemId> 987</ItemId>
        <ItemName>Coupler</ItemName>
        <Quantity>5</Quantity>
    </Item>
    <Item>
        <ItemId>654</ItemId>
        <ItemName>Connector</ItemName>
        <Quantity unit="12">3</Quantity>
    </Item>
    <Item>
        <ItemId>579</ItemId>
        <ItemName>Clasp</ItemName>
        <Quantity>1</Quantity>
    </Item>
</Order>

This is relevant part (using sax) :

public class SaxParser extends DefaultHandler {

    boolean isItem = false;
    boolean isOrder = false;
    boolean isDate = false;
    boolean isCustomerId = false;
    private Order order;
    private Item item;

        @Override
    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) {
        if (localName.equalsIgnoreCase("ORDER")) {
            order = new Order();
        }

        if (localName.equalsIgnoreCase("DATE")) {
            isDate = true;
        }

        if (localName.equalsIgnoreCase("CUSTOMERID")) {
            isCustomerId = true;
        }

        if (localName.equalsIgnoreCase("ITEM")) {
            isItem = true;
        }
    }

    public void characters(char ch[], int start, int length) throws SAXException {

        if (isDate){
            SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
            String value = new String(ch, start, length);
            try {
                order.setDate(formatter.parse(value));
            } catch (ParseException e) {
                e.printStackTrace();
            }
        }

        if(isCustomerId){
            order.setCustomerId(Integer.valueOf(new String(ch, start, length)));
        }

        if (isItem) {
            item = new Item();
            isItem = false;
        }



    }

}

I'm wondering is there a way to get rid of these hideous booleans which keep growing with number of elements. There must be a better way to parse this relatively simple xml. Just by looking the lines of code necessary to do this task looks ugly.

Currently I'm using SAX parser, but I'm open to any other suggestions (other than DOM, I can't afford in memory parsers I have huge XML files).


Solution

  • Here's an example of using JAXB with StAX.

    Input document:

    <?xml version="1.0" encoding="UTF-8"?>
    <Personlist xmlns="http://example.org">
        <Person>
            <Name>Name 1</Name>
            <Address>
                <StreetAddress>Somestreet</StreetAddress>
                <PostalCode>00001</PostalCode>
                <CountryName>Finland</CountryName>
            </Address>
        </Person>
        <Person>
            <Name>Name 2</Name>
            <Address>
                <StreetAddress>Someotherstreet</StreetAddress>
                <PostalCode>43400</PostalCode>
                <CountryName>Sweden</CountryName>
            </Address>
        </Person>
    </Personlist>
    

    Person.java:

    @XmlRootElement(name = "Person", namespace = "http://example.org")
    public class Person {
        @XmlElement(name = "Name", namespace = "http://example.org")
        private String name;
        @XmlElement(name = "Address", namespace = "http://example.org")
        private Address address;
    
        public String getName() {
            return name;
        }
    
        public Address getAddress() {
            return address;
        }
    }
    

    Address.java:

    public class Address {
        @XmlElement(name = "StreetAddress", namespace = "http://example.org")
        private String streetAddress;
        @XmlElement(name = "PostalCode", namespace = "http://example.org")
        private String postalCode;
        @XmlElement(name = "CountryName", namespace = "http://example.org")
        private String countryName;
    
        public String getStreetAddress() {
            return streetAddress;
        }
    
        public String getPostalCode() {
            return postalCode;
        }
    
        public String getCountryName() {
            return countryName;
        }
    }
    

    PersonlistProcessor.java:

    public class PersonlistProcessor {
        public static void main(String[] args) throws Exception {
            new PersonlistProcessor().processPersonlist(PersonlistProcessor.class
                    .getResourceAsStream("personlist.xml"));
        }
    
        // TODO: Instead of throws Exception, all exceptions should be wrapped
        // inside runtime exception
        public void processPersonlist(InputStream inputStream) throws Exception {
            JAXBContext jaxbContext = JAXBContext.newInstance(Person.class);
            XMLStreamReader xss = XMLInputFactory.newFactory().createXMLStreamReader(inputStream);
            // Create unmarshaller
            Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
            // Go to next tag
            xss.nextTag();
            // Require Personlist
            xss.require(XMLStreamReader.START_ELEMENT, "http://example.org", "Personlist");
            // Go to next tag
            while (xss.nextTag() == XMLStreamReader.START_ELEMENT) {
                // Require Person
                xss.require(XMLStreamReader.START_ELEMENT, "http://example.org", "Person");
                // Unmarshall person
                Person person = (Person)unmarshaller.unmarshal(xss);
                // Process person
                processPerson(person);
            }
            // Require Personlist
            xss.require(XMLStreamReader.END_ELEMENT, "http://example.org", "Personlist");
        }
    
        private void processPerson(Person person) {
            System.out.println(person.getName());
            System.out.println(person.getAddress().getCountryName());
        }
    }