I have a very small xml file, which I just want to read in Java. After referring this post, I decided to go with SAX parser. My xml file looks like below-
<?xml version="1.0" encoding="UTF-8"?>
<catalog>
<library name="Central Library">
<read>
<book id="001" lang="ENG" title="Operating System Concepts" author="Silberschatz" />
<book id="002" lang="ENG" title="Design Patterns: Elements of Reusable Object-Oriented Software" author="Gangs of Four" />
</read>
<unread>
<book id="003" lang="ENG" title="Introduction to Algorithms" author="Cormen" />
<book id="004" lang="ENG" title="Computer networks" author="Tanenbaum" />
</unread>
</library>
</catalog>
While reading this xml, I am unable to identify read and unread books. Below is the parser code-
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ParseXML extends DefaultHandler {
private Library lib;
public ParseXML(String file) throws ParserConfigurationException, SAXException, IOException {
parse(file);
}
private void parse(String file) throws ParserConfigurationException, SAXException, IOException {
final SAXParserFactory factory = SAXParserFactory.newInstance();
final SAXParser parser = factory.newSAXParser();
parser.parse(file, this);
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equals("library")) {
String name = attributes.getValue("name");
lib = new Library(name);
}
if (qName.equals("book")) {
String id = attributes.getValue("id");
String lang = attributes.getValue("lang");
String title = attributes.getValue("title");
String author = attributes.getValue("author");
Book book = new Book(id, lang, title, author);
// How to decide here, to which list this book should be added
lib.addIntoReadBooks(book);
// lib.addIntoUnreadBooks(book);
}
}
/**
* @return the library
*/
public Library getLibrary() {
return lib;
}
public static void main(String[] args) {
try {
ParseXML parseXML = new ParseXML("repository/books.xml");
Library library = parseXML.getLibrary();
System.out.println("Library=" + library);
} catch (ParserConfigurationException e) {
System.err.println("Error " + e.getMessage());
} catch (SAXException e) {
System.err.println("Error " + e.getMessage());
} catch (IOException e) {
System.err.println("Error " + e.getMessage());
}
}
}
The Library and Book class are following-
Library.java
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public final class Library {
private final String name;
private final List<Book> readBooks;
private final List<Book> unreadBooks;
public Library(String name) {
this.name = name;
readBooks = new ArrayList<Book>();
unreadBooks = new ArrayList<Book>();
}
public void addIntoReadBooks(Book book) {
getReadBooks().add(book);
}
public void addIntoUnreadBooks(Book book) {
getUnreadBooks().add(book);
}
//Getters
}
Book.java
public class Book {
private final String id;
private final String lang;
private final String title;
private final String author;
public Book(String id, String lang, String title, String author) {
this.id = id;
this.lang = lang;
this.title = title;
this.author = author;
}
//Getters
}
How to decide, to which list the book should be added?
Somehow I managed to live with following code, which uses two flag to keep track of visited nodes-
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ParseXML extends DefaultHandler {
private Library lib;
private boolean read;
private boolean unread;
public ParseXML(String file) throws ParserConfigurationException, SAXException, IOException {
parse(file);
}
private void parse(String file) throws ParserConfigurationException, SAXException, IOException {
final SAXParserFactory factory = SAXParserFactory.newInstance();
final SAXParser parser = factory.newSAXParser();
parser.parse(file, this);
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equals("read")) {
read = false;
}
if (qName.equals("unread")) {
unread = false;
}
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equals("library")) {
String name = attributes.getValue("name");
lib = new Library(name);
}
if (qName.equals("read")) {
read = true;
}
if (qName.equals("unread")) {
unread = true;
}
if (qName.equals("book")) {
String id = attributes.getValue("id");
String lang = attributes.getValue("lang");
String title = attributes.getValue("title");
String author = attributes.getValue("author");
Book book = new Book(id, lang, title, author);
// How to decide here, to which list this book should be added
if (read && !unread) {
lib.addIntoReadBooks(book);
} else if (!read && unread) {
lib.addIntoUnreadBooks(book);
}
}
}
/**
* @return the lib
*/
public Library getLibrary() {
return lib;
}
}
Is there exist any better solution for the same?
It's not clear to me that this is necessarily better, but you could maintain a currentList
field in your ParseXML
class for the current list to manipulate:
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ParseXML extends DefaultHandler {
private Library lib;
private List<Book> extraBooks = new ArrayList<Book>();
private List<Book> currentList = extraBooks;
public ParseXML(String file) throws ParserConfigurationException, SAXException, IOException {
parse(file);
}
private void parse(String file) throws ParserConfigurationException, SAXException, IOException {
final SAXParserFactory factory = SAXParserFactory.newInstance();
final SAXParser parser = factory.newSAXParser();
parser.parse(file, this);
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equals("read") || qName.equals("unread"))
currentList = extraBooks;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equals("library")) {
String name = attributes.getValue("name");
lib = new Library(name);
}
else if (qName.equals("read")) {
currentList = lib.getReadBooks();
}
else if (qName.equals("unread")) {
currentList = lib.getUnreadBooks();
}
else if (qName.equals("book")) {
String id = attributes.getValue("id");
String lang = attributes.getValue("lang");
String title = attributes.getValue("title");
String author = attributes.getValue("author");
currentList.add(new Book(id, lang, title, author));
}
}
/**
* @return the lib
*/
public Library getLibrary() {
return lib;
}
}
As a mildly different approach, you could of course keep both lists and the library name as fields in your handler, and create the Library
in the endElement
method using all the data collected.
Note: (obsolete) In this code I used the field extraBooks
to allow for <book>
tags occurring outside the <read>
and <unread>
tags. It's just a little safer than just setting currentList
to null
in the endElement
method. I hate NullPointerException
.
Update:
You don't need the throwaway field extraBooks
.
You can accomplish the same null-safety by changing the initialization of currentList
to
private List<Book> currentList = new ArrayList<Book>();
and the endElement
method to
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equals("read") || qName.equals("unread"))
currentList = new ArrayList<Book>();
}