Search code examples
javaxmlparsingsaxsaxparser

Sax Parser - Unable to split XML file to specified size


I'm stuggling with some difficulties how to read xml and split it to multiple files using SAX Parser. Consider that we have on input following generated xml:

<?xml version="1.0" encoding="utf-8"?>
<record-table>
  <record>
    <record_id>12345</record_id>
    <record_rows>
      <record_row>str1234</record_row>
    </record_rows>
  </record>
  <footer>
    <record_count>12345</record_count>
    <record_row_count>12345</record_row_count>
  </footer>
</record-table>

To make it clean and sweet, I made the "TODO" list:

XML splitting:
 * Splits file generated by XML generation functionality in multiple files of configurable size.
 * Asks the user XML file location.
 * Asks the user maximum single file size in bytes.
 * Each split file must conform to schema.
 * Elements record_count and record_row_count should contain actual numbers for each file.
 * Files should be split as close to specified limit as possible.

As of this moment, i made multiple attempts to read it, the program executes but no action is made.

Draft code:

    public static void splitXML(File fileToSplit, int splitFileSize) {
    try {
        SAXParserFactory factory = SAXParserFactory.newInstance();
        SAXParser parser = factory.newSAXParser();
        XMLReader reader = parser.getXMLReader();

        reader.parse(new InputSource(new FileInputStream(fileToSplit)));
        reader.setContentHandler(new DefaultHandler() {

            public static final String DIRECTORY = "target/results";

            private int fileSize = 0;

            private File fileLocation;

            // counts number of files created
            private int fileCount = 0;

            // counts characters to decide where to split file
            private long charCount = 0;
            // data line buffer (is reset when the file is split)
            private StringBuilder recordRowDataLines = new StringBuilder();

            // temporary variables used for the parser events
            private String currentElement = null;
            private String currentRecordId = null;
            private String currentRecordRowData = null;

            public final long TAG_CHAR_SIZE = 5;

            @Override
            public void startDocument() throws SAXException {
                File directory = new File(DIRECTORY);
                if(!directory.exists())
                    directory.mkdir();
            }

            @Override
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
                currentElement = qName;
            }

            @Override
            public void endElement(String uri, String localName, String qName) throws SAXException {
                if(qName.equals("record_rows")) {
                    try {
                        savePatch();
                    } catch (IOException e) {
                        throw new SAXException(e);
                    }
                }
                if (qName.equals("record_row")) { // one record finished - save in buffer & calculate size so far
                    charCount += tagSize("record_row");
                    recordRowDataLines.append("<record_row>")
                            .append(currentRecordRowData)
                            .append("</record_row>");
                    if (charCount >= fileSize) { // if max size was reached, save what was read so far in a new file
                        try {
                            savePatch();
                        } catch (IOException ex) {
                            throw new SAXException(ex);
                        }
                    }
                }
                currentElement = null;
            }

            @Override
            public void characters(char[] ch, int start, int length) throws SAXException {
                System.out.println(new String(ch, start, length));
                if (currentElement == null) {
                    return;
                }
                if (currentElement.equals("record_id")) {
                    currentRecordId = new String(ch, start, length);
                }
                if (currentElement.equals("record_row")) {
                    currentRecordRowData = new String(ch, start, length);
                    charCount += currentRecordRowData.length(); // storing size so far
                }
            }

            public long tagSize(String tagName) {
                return TAG_CHAR_SIZE + tagName.length() * 2; // size of text + tags
            }

            public void savePatch() throws IOException {
                ++fileCount;
                StringBuilder stringBuilder = new StringBuilder();
                stringBuilder.append("<record part='")
                        .append(fileCount)
                        .append("'><record_id>")
                        .append(currentRecordId)
                        .append("</record_id>")
                        .append("<record_rows>")
                        .append(recordRowDataLines)
                        .append("</record_rows></record>");
                File fragment = new File(DIRECTORY, "data_part_" + fileCount + ".xml");
                System.out.println("File " + fragment.getAbsolutePath() + "has been saved!");

                try(FileWriter out = new FileWriter(fragment)){
                    out.write(stringBuilder.toString());
                } catch (Exception e) {
                    e.printStackTrace();
                }

                //flush current information that was saved.
                recordRowDataLines = new StringBuilder();
                charCount = 0;
            }
        });

    } catch (ParserConfigurationException | SAXException | IOException e) {
        e.printStackTrace();
    }
}

How looks main class:

public class Main {

    public static void main(String[] args) {
        System.out.println("Welcome!");

        <omitted>
        File f = CommonUtils.requestFilePath();
        int fileSize = CommonUtils.requestUserValueInt("Enter file split size : ");
        XMLSplitter.splitXML(f, fileSize);
    }
}

May you see, what i cant. Please help.


Solution

  • You should call the setContentHandler before the parse.