Search code examples
javaxmlstax

How do I remove elements and their children from Xml using Java and Stax


I have a large file containing data like this

<releases>
   <release>
   ......
      <companies>
         <company>
         </company>
      </companies>
   </release>
   <release>
   ......
</releases>

I want to produce and output file the same as the input file but with all the companies elements and their children removed.I'm trying to use Stax, I thought that it iterated down through the elements and then if I got a match I could just not write that element and the section would be removed. i.e if I throw away element that causes elements within it and to be removed but it seems that it just removes the element itself and nothing else, is that right ?

i.e

<releases>
   <release>
   ......
   </release>
   <release>
   ......
</releases>

This is the code I currently have:

        XMLInputFactory  inputFactory = XMLInputFactory.newInstance();
        XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
        InputStream in = new FileInputStream(source);
        XMLEventReader reader = inputFactory.createXMLEventReader(in);

        OutputStream out = new FileOutputStream(target);
        XMLEventWriter writer =  outputFactory.createXMLEventWriter(out);
        XMLEvent event;
        while(reader.hasNext()){
            event = reader.nextEvent();
            writer.add(event);
            if(event.getEventType() == XMLStreamConstants.START_ELEMENT)
            {
                if(event.asStartElement().getName().toString().equalsIgnoreCase("companies"))
                {
                    System.out.println("Deleting:"+event);
                    continue;
                }
                else
                {
                    writer.add(event);
                }
            }
            else
            {
                writer.add(event);
            } 
        }

Solution

  • I think I have it now, Stax does process element by element so I needed to mark when the starting element is found, set a variable to indicate all subsequent data should be ignored until get to the ending element.

    Code Example:

    import javax.xml.stream.*;
    import javax.xml.stream.events.XMLEvent;
    import java.io.*;
    
    public class FixDb
    {
    
        public static void main(String[] args) throws Exception
        {
            File source = new File(args[0]);
            if(!source.exists())
            {
                System.out.println("File:"+source+ " does not exist");
            }
    
            File target = new File(source+".new");
    
            XMLInputFactory  inputFactory = XMLInputFactory.newInstance();
            XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
            InputStream in = new FileInputStream(source);
            XMLEventReader reader = inputFactory.createXMLEventReader(in);
    
            OutputStream out = new FileOutputStream(target);
            XMLEventWriter writer =  outputFactory.createXMLEventWriter(out);
            XMLEvent event;
    
            boolean deleteSection = false;
            while(reader.hasNext())
            {
                event = reader.nextEvent();
                if(event.getEventType() == XMLStreamConstants.START_ELEMENT && event.asStartElement().getName().toString().equalsIgnoreCase("companies"))
                {
                    deleteSection=true;
                    continue;
                }
                else if(event.getEventType() == XMLStreamConstants.END_ELEMENT && (event.asEndElement().getName().toString().equalsIgnoreCase("companies")))
                {
                    deleteSection=false;
                    continue;
                }
                else if(deleteSection)
                {
                    continue;
                }
                else
                {
                    writer.add(event);
                }
            }
        }
    }