How to export huge result set from database into several csv files and zip them on the fly?

I need to create a REST controller which extracts data from a database and write it into CSV files that will ultimately be zipped together. Each CSV file should contain exactly 10 lines. Eventually all CSV files should be zipped into a one zip file. I want everything to happen on the fly, meaning - saving files to a temporary location on the disk is not an option. Can someone provide me with an example?

Solution

I found a very nice code to export huge amount of rows from database into several csv files and zip it. I think this is a nice code that can assist alot of developers. I have tested the solution and you can find the entire example at : https://github.com/idaamit/stream-from-db/tree/master The conroller is :

@GetMapping(value = "/employees/{employeeId}/cars") @ResponseStatus(HttpStatus.OK) public ResponseEntity<StreamingResponseBody> getEmployeeCars(@PathVariable  int employeeId) {
    log.info("Going to export cars for employee {}", employeeId);
    String zipFileName = "Cars Of Employee - " + employeeId;
    return ResponseEntity.ok()
            .header(HttpHeaders.CONTENT_TYPE, "application/zip")
            .header(HttpHeaders.CONTENT_DISPOSITION, "attachment;filename=" + zipFileName + ".zip")
            .body(
                    employee.getCars(dataSource, employeeId));

The employee class, first checks if we need to prepare more than one csv or not :

public class Employee {

public StreamingResponseBody getCars(BasicDataSource dataSource, int employeeId) {
    StreamingResponseBody streamingResponseBody = new StreamingResponseBody() {
        @Override
        public void writeTo(OutputStream outputStream) throws IOException {
            JdbcTemplate jdbcTemplate = new JdbcTemplate(dataSource);
            String sqlQuery = "SELECT [Id], [employeeId],  [type], [text1] " +
                    "FROM Cars " +
                    "WHERE EmployeeID=? ";
            PreparedStatementSetter preparedStatementSetter = new PreparedStatementSetter() {
                public void setValues(PreparedStatement preparedStatement) throws SQLException {
                    preparedStatement.setInt(1, employeeId);
                }
            };
            StreamingZipResultSetExtractor zipExtractor = new StreamingZipResultSetExtractor(outputStream, employeeId, isMoreThanOneFile(jdbcTemplate, employeeId));
            Integer numberOfInteractionsSent = jdbcTemplate.query(sqlQuery, preparedStatementSetter, zipExtractor);

        }
    };
    return streamingResponseBody;
}

private boolean isMoreThanOneFile(JdbcTemplate jdbcTemplate, int employeeId) {
    Integer numberOfCars = getCount(jdbcTemplate, employeeId);
    return numberOfCars >= StreamingZipResultSetExtractor.MAX_ROWS_IN_CSV;
}

private Integer getCount(JdbcTemplate jdbcTemplate, int employeeId) {

    String sqlQuery = "SELECT count([Id]) " +
            "FROM Cars " +
            "WHERE EmployeeID=? ";

    return jdbcTemplate.queryForObject(sqlQuery, new Object[] { employeeId }, Integer.class);

}

}

This class StreamingZipResultSetExtractor is responsible to split the csv streaming data into several files and zip it.

@Slf4j
public class StreamingZipResultSetExtractor implements ResultSetExtractor<Integer> {
    private final static int CHUNK_SIZE = 100000;
    public final static int MAX_ROWS_IN_CSV = 10;
    private OutputStream outputStream;
    private int employeeId;
    private StreamingCsvResultSetExtractor streamingCsvResultSetExtractor;
    private boolean isInteractionCountExceedsLimit;
    private int fileCount = 0;
public StreamingZipResultSetExtractor(OutputStream outputStream, int employeeId, boolean isInteractionCountExceedsLimit) {
    this.outputStream = outputStream;
    this.employeeId = employeeId;
    this.streamingCsvResultSetExtractor = new StreamingCsvResultSetExtractor(employeeId);
    this.isInteractionCountExceedsLimit = isInteractionCountExceedsLimit;
}

@Override
@SneakyThrows
public Integer extractData(ResultSet resultSet) throws DataAccessException {
    log.info("Creating thread to extract data as zip file for employeeId {}", employeeId);
    int lineCount = 1; //+1 for header row
    try (PipedOutputStream internalOutputStream = streamingCsvResultSetExtractor.extractData(resultSet);
         PipedInputStream InputStream = new PipedInputStream(internalOutputStream);
         BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(InputStream))) {

        String currentLine;
        String header = bufferedReader.readLine() + "\n";
        try (ZipOutputStream zipOutputStream = new ZipOutputStream(outputStream)) {
            createFile(employeeId, zipOutputStream, header);
            while ((currentLine = bufferedReader.readLine()) != null) {
                if (lineCount % MAX_ROWS_IN_CSV == 0) {
                    zipOutputStream.closeEntry();
                    createFile(employeeId, zipOutputStream, header);
                    lineCount++;
                }
                lineCount++;
                currentLine += "\n";
                zipOutputStream.write(currentLine.getBytes());
                if (lineCount % CHUNK_SIZE == 0) {
                    zipOutputStream.flush();
                }
            }
        }
    } catch (IOException e) {
        log.error("Task {} could not zip search results", employeeId, e);
    }

    log.info("Finished zipping all lines to {} file\\s - total of {} lines of data for task {}", fileCount, lineCount - fileCount, employeeId);
    return lineCount;
}

private void createFile(int employeeId, ZipOutputStream zipOutputStream, String header) {
    String fileName = "Cars for Employee - " + employeeId;
    if (isInteractionCountExceedsLimit) {
        fileCount++;
        fileName += " Part " + fileCount;
    }
    try {
        zipOutputStream.putNextEntry(new ZipEntry(fileName + ".csv"));
        zipOutputStream.write(header.getBytes());
    } catch (IOException e) {
        log.error("Could not create new zip entry for task {} ", employeeId, e);
    }
}

}

The class StreamingCsvResultSetExtractor is responsible for transfer the data from the resultset into csv file. There is more work to do to handle special character set which are problematic in csv cell.

@Slf4j
public class StreamingCsvResultSetExtractor implements ResultSetExtractor<PipedOutputStream> {
    private final static int CHUNK_SIZE = 100000;
    private PipedOutputStream pipedOutputStream;
    private final int employeeId;
    public StreamingCsvResultSetExtractor(int employeeId) {
        this.employeeId = employeeId;
    }
    @SneakyThrows
    @Override
    public PipedOutputStream extractData(ResultSet resultSet) throws DataAccessException {
        log.info("Creating thread to extract data as csv and save to file for task {}", employeeId);
        this.pipedOutputStream = new PipedOutputStream();
        ExecutorService executor = Executors.newSingleThreadExecutor();
        executor.submit(() -> {
            prepareCsv(resultSet);
        });

        return pipedOutputStream;
    }

    @SneakyThrows
    private Integer prepareCsv(ResultSet resultSet) {
        int interactionsSent = 1;
        log.info("starting to extract data to csv lines");
        streamHeaders(resultSet.getMetaData());
        StringBuilder csvRowBuilder = new StringBuilder();
        try {
            int columnCount = resultSet.getMetaData().getColumnCount();
            while (resultSet.next()) {
                for (int i = 1; i < columnCount + 1; i++) {
                    if(resultSet.getString(i) != null && resultSet.getString(i).contains(",")){
                            String strToAppend = "\"" + resultSet.getString(i) + "\"";
                            csvRowBuilder.append(strToAppend);
                        } else {
                            csvRowBuilder.append(resultSet.getString(i));
                    }
                    csvRowBuilder.append(",");
                }
                int rowLength = csvRowBuilder.length();
                csvRowBuilder.replace(rowLength - 1, rowLength, "\n");

                pipedOutputStream.write(csvRowBuilder.toString().getBytes());
                interactionsSent++;
                csvRowBuilder.setLength(0);
                if (interactionsSent % CHUNK_SIZE == 0) {
                    pipedOutputStream.flush();
                }
            }
        } finally {
            pipedOutputStream.flush();
            pipedOutputStream.close();
        }

        log.debug("Created all csv lines for Task {} - total of {} rows", employeeId, interactionsSent);
        return interactionsSent;
    }

    @SneakyThrows
    private void streamHeaders(ResultSetMetaData resultSetMetaData) {
        StringBuilder headersCsvBuilder = new StringBuilder();

        for (int i = 1; i < resultSetMetaData.getColumnCount() + 1; i++) {
            headersCsvBuilder.append(resultSetMetaData.getColumnLabel(i)).append(",");
        }
        int rowLength = headersCsvBuilder.length();
        headersCsvBuilder.replace(rowLength - 1, rowLength, "\n");

        pipedOutputStream.write(headersCsvBuilder.toString().getBytes());
    }

}

In order to test this, you need to execute http://localhost:8080/stream-demo/employees/3/cars