Search code examples
springcsvspring-batchspring-batch-admin

How to skip blank lines in CSV using FlatFileItemReader and chunks


I am processing CSV files using FlatFileItemReader.

Sometimes I am getting blank lines within the input file.

When that happened the whole step stops. I want to skipped those lines and proceed normal.

I tried to add exception handler to the step in order to catch the execption instead of having the whole step stooped:

@Bean
    public Step processSnidUploadedFileStep() {
        return stepBuilderFactory.get("processSnidFileStep")
                .<MyDTO, MyDTO>chunk(numOfProcessingChunksPerFile) 
                .reader(snidFileReader(OVERRIDDEN_BY_EXPRESSION))
                .processor(manualUploadAsyncItemProcessor())
                .writer(manualUploadAsyncItemWriter())
                .listener(logProcessListener)
                .throttleLimit(20)
                .taskExecutor(infrastructureConfigurationConfig.taskJobExecutor())
                .exceptionHandler((context, throwable) -> logger.error("Skipping record on file. cause="+ ((FlatFileParseException)throwable).getCause()))
                .build();
    }

Since I am processing with chunks when blank line arrives and exception is catched what's happens is that the whole chunk is skipped(the chunk might contains valid lines on CSV file and they are skipped aswell)

Any idea how to do this right when processing file in chunks?

Thanks, ray.

After editing my code. still not skipping:

public Step processSnidUploadedFileStep() {
        SimpleStepBuilder<MyDTO, MyDTO> builder = new SimpleStepBuilder<MyDTO, MyDTO>(stepBuilderFactory.get("processSnidFileStep"));
       return builder
                .<PushItemDTO, PushItemDTO>chunk(numOfProcessingChunksPerFile)
                .faultTolerant().skip(FlatFileParseException.class)
                .reader(snidFileReader(OVERRIDDEN_BY_EXPRESSION))
                .processor(manualUploadAsyncItemProcessor())
                .writer(manualUploadAsyncItemWriter())
                .listener(logProcessListener)
                .throttleLimit(20)
                .taskExecutor(infrastructureConfigurationConfig.taskJobExecutor())
                .build();
    }

Solution

  • We created custom SimpleRecordSeparatorPolicy which is telling reader to skip blank lines. That way we read 100 records, i.e. 3 are blank lines and those are ignored without exception and it writes 97 records.

    Here is code:

    package com.my.package;
    
    import org.springframework.batch.item.file.separator.SimpleRecordSeparatorPolicy;
    
    public class BlankLineRecordSeparatorPolicy extends SimpleRecordSeparatorPolicy {
    
        @Override
        public boolean isEndOfRecord(final String line) {
            return line.trim().length() != 0 && super.isEndOfRecord(line);
        }
    
        @Override
        public String postProcess(final String record) {
            if (record == null || record.trim().length() == 0) {
                return null;
            }
            return super.postProcess(record);
        }
    
    }
    

    And here is reader:

    package com.my.package;
    
    import org.springframework.batch.core.configuration.annotation.StepScope;
    import org.springframework.batch.item.file.FlatFileItemReader;
    import org.springframework.batch.item.file.mapping.DefaultLineMapper;
    import org.springframework.batch.item.file.transform.DelimitedLineTokenizer;
    import org.springframework.stereotype.Component;
    
    @Component
    @StepScope
    public class CustomReader extends FlatFileItemReader<CustomClass> {
    
        @Override
        public void afterPropertiesSet() throws Exception {
            setLineMapper(new DefaultLineMapper<CustomClass>() {
                {
                    /// configuration of line mapper
                }
            });
            setRecordSeparatorPolicy(new BlankLineRecordSeparatorPolicy());
            super.afterPropertiesSet();
        }
    }