Search code examples
javaiofilewriter

When writing a huge amount of data, parts of it get lost / When every data is present, the write process is very slow


I have a problem with the Buffered writer when writing a large amount of strings to a file.

Situation: I have to read a large text file (>100k lines) and perform some modification to each line (remove whitspaces, check for optional commands, etc.) and write the modified content to a new file.

I have tried two possibilities to write to the file and get only one of the two following results:

  1. The write process is horribly slow, but all lines are processed
  2. Several chunks of lines are getting munched during the writing process, leaving an incomplete modified result.

Approaches and results:

  1. Horribly slow but complete
// read file content and put it in List<String> fileContent
for (String line : fileContent)
{
  try(BufferedWriter writer = new BufferedWriter(new OutputStreamwriter(new FileOutputStream(filename, true))))
    {
      writer.write(modifyFileContent(fileContent));
    }
}

I already know, opening a file to write one line and closing it directly is very good at underperforming. A modification of a file with around 4M lines takes around 4h or so, which is not desireable. At least, it works...

  1. Faster, but incomplete write
// read file content and put it in List<String> fileContent
// This is placed in a try/catch block, I'm omitting it here for brevity
BufferedWriter writer = new BufferedWriter(new OutputStreamwriter(new FileOutputStream(filename, true);
for (String line : fileContent)
{
  writer.write(modifyFileContent(fileContent));
}
writer.close();

This works faster, but I get following content in the result file (I use the line number from the original file for this debug purpose):

...
Very long line with interesting content // line nb 567
Very long line with interesting content // line nb 568
Very long line with interesting content // line nb 569
Very long line wi
Very long line with interesting content // line nb 834
Very long line with interesting content // line nb 835
Very long line with interesting content // line nb 836
...

When printing this strings to the console, I see no gaps in the line numbering! So it seems, there is somewhere a buffering issue...

Other approaches: I also tried the NIO version of newBufferedWriter, which also omitted several lines.

Question: What am I missing here? Is there a way, to get a good write performance with correctness here? The input files are usually in the area of several 100MB and Millions of lines... Any hints are much appreciated :)

[edit]

Thanks to Sir Lopez I found a working solution. I never stumbled upon RandomAccessFile before...

Now with this information, I guess I run into a race condition or something else thread related... As I started working with threads just recently, I guess, this could've be expected...

To give the proper view, I made a minimal example, which shows the context, in which my problem originally occured. Any Feedback is welcome :) :

package minex;

import java.awt.EventQueue;
import java.awt.event.ActionEvent;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.swing.GroupLayout;
import static javax.swing.GroupLayout.Alignment.BASELINE;
import static javax.swing.GroupLayout.Alignment.LEADING;
import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JProgressBar;
import javax.swing.SwingWorker;
import javax.swing.UIManager;
import javax.swing.WindowConstants;

/**
 * Read a file line by line, modify its content and write it to another file.
 * @author demo
 */
public class gui extends JFrame {

  /**
   * Back ground task, so the gui isn't blocked and the progress bar can be updated.
   */
  class fileConversionWorker extends SwingWorker<Integer, Double>
  {
    private final File file;
    
    public fileConversionWorker(File file)
    {
      this.file = file;
    }
 
    /**
     * Count the lines in the provided file. Needed to set the boundary
     * settings for the progress bar.
     * @param aFile File to read.
     * @return Number of lines present in aFile
     * @throws IOException 
     * @see quick and dirty taken from https://stackoverflow.com/a/1277955
     */
    private int countLines(File aFile) throws IOException {
    LineNumberReader reader = null;
    try {
        reader = new LineNumberReader(new FileReader(aFile));
        while ((reader.readLine()) != null);
        return reader.getLineNumber();
    } catch (Exception ex) {
        return -1;
    } finally { 
        if(reader != null) 
            reader.close();
    }
}
    
    /**
     * Reads a file line by line, modify the line
     * content and write it back to a different file immediately.
     * @return 
     */
    @Override
    public Integer doInBackground()
    {
      int totalLines = 0;
      try {
        // Indicate, that something is happening
        barProgress.setIndeterminate(true);
        totalLines = countLines(file);
        barProgress.setIndeterminate(false);
      } catch (IOException ex) {
        Logger.getLogger(gui.class.getName()).log(Level.SEVERE, null, ex);
      }
      
      // only proceed, when we at least have 1 line to manipulate.
      if (totalLines > 0)
      {
        BufferedReader br = null;
        BufferedWriter writer = null;
        try {
          barProgress.setMaximum(totalLines);
          br = new BufferedReader(new FileReader(file));
          String filename =  file.getAbsolutePath() + ".mod";
          long lineNb = 0;
          
          writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename, true)));
          
          String line;
          // Read original file, modify line and immediately write to new file
          while ((line = br.readLine()) != null)
          {
            writer.write(line + " // " + lineNb);
            writer.newLine();

            publish((double)(lineNb / totalLines));
            lineNb++;
          }
        } catch (FileNotFoundException ex) {
          Logger.getLogger(gui.class.getName()).log(Level.SEVERE, null, ex);
        } catch ( IOException ex) {
          Logger.getLogger(gui.class.getName()).log(Level.SEVERE, null, ex);
        }
        finally {
          // Tidying up
          try {
            if (br != null)
              br.close();
            if (writer != null)
              writer.close();
          } catch (IOException ex) {
            Logger.getLogger(gui.class.getName()).log(Level.SEVERE, null, ex);
          }
        }
      }
      return 0;
    }
    
    /**
     * Prevent any interaction, which could interrupt the worker
     */
    @Override 
    public void done()
    {
      butLoadFile.setEnabled(true); 
    }
    
    /**
     * Update progress the progress bar,
     * @param aDoubles
     */
    @Override
    protected void process(java.util.List<Double> aDoubles) {    
      int amount = barProgress.getMaximum() - barProgress.getMinimum();
      barProgress.setValue( ( int ) (barProgress.getMinimum() + ( amount * aDoubles.get( aDoubles.size() - 1 ))) );
    }
  }
  
  /**
   * Start the gui.
   */
  public static void main()
  {
    EventQueue.invokeLater(() -> {
      new gui().setVisible(true);
    });
  }
  
  /**
   * Initialize all things needed.
   */
  public gui()
  {
    initComponents();
  }
  
  /**
   * Load a file and immediately begin processing it.
   * @param evt 
   */
  private void butLoadFileActionListener(ActionEvent evt)
  {
    javax.swing.JFileChooser fc = new javax.swing.JFileChooser("/home/demo/fileFolder");
    int returnVal = fc.showOpenDialog(gui.this);
    
    if (returnVal == JFileChooser.APPROVE_OPTION) {
      File file = fc.getSelectedFile();
      butLoadFile.setEnabled(false);
      fileConversionWorker worker = new fileConversionWorker(file);
      worker.execute();
    }
  }
  
  /**
   * Paint the canvas.
   */
  private void initComponents()
  {
    setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
    setResizable(false);
    setTitle("Min Example");
    
    butLoadFile = new JButton("Load file");
    butLoadFile.addActionListener((ActionEvent evt) -> {
      butLoadFileActionListener(evt);
    });
    
    barProgress = new JProgressBar();
    barProgress.setStringPainted(true);
    barProgress.setMinimum(0);
    
    javax.swing.GroupLayout layout = new GroupLayout(getContentPane());
    getContentPane().setLayout(layout);
    
    layout.setHorizontalGroup(
    layout.createParallelGroup(LEADING)
            .addComponent(butLoadFile, GroupLayout.PREFERRED_SIZE, 200, GroupLayout.PREFERRED_SIZE)
            .addComponent(barProgress, GroupLayout.PREFERRED_SIZE, 200, GroupLayout.PREFERRED_SIZE)
    );

    layout.setVerticalGroup(
    layout.createParallelGroup(BASELINE)
            .addGroup(layout.createSequentialGroup()
            .addComponent(butLoadFile, GroupLayout.PREFERRED_SIZE, 20, GroupLayout.PREFERRED_SIZE)
            .addComponent(barProgress, GroupLayout.PREFERRED_SIZE, 20, GroupLayout.PREFERRED_SIZE)            
            )
    );
    
    pack();
  }
  
  private JButton butLoadFile;        /** Button to load a file. */
  private JProgressBar barProgress;   /** Progress bar to visualize progress. */  
}

[/edit]


Solution

  • Maybe this can help you

    Fastest way to write huge data in text file Java

    https://www.quora.com/How-do-to-read-and-write-large-size-file-in-Java-efficiently