I am using opencsv parser. In my csv file there can be about 100,000 rows. It takes too much time to read them. How can I skip and read every Nth row (each 30th row for example)?
private suspend fun readCSVRides(folder: String): MutableList<RideDataCsv>? = withContext(Dispatchers.IO) {
val path = getExternalFilesDir(null)!!.path + "/" + folder + "/" + "measurements.csv"
var segments: MutableList<RideDataCsv>? = null
var fileReader: BufferedReader? = null
val csvToBean: CsvToBean<RideDataCsv>?
try {
fileReader = BufferedReader(FileReader(path))
csvToBean = CsvToBeanBuilder<RideDataCsv>(fileReader)
.withType(RideDataCsv::class.java)
.withIgnoreLeadingWhiteSpace(true)
.withIgnoreEmptyLine(true)
.build()
segments = csvToBean.parse()
} catch (e: Exception) {
println("Reading CSV Error!")
e.printStackTrace()
} finally {
try {
fileReader!!.close()
} catch (e: IOException) {
println("Closing fileReader/csvParser Error!")
e.printStackTrace()
}
}
segments
}
You can implement a Reader
that skips every nth line in the input and wrap your fileReader
like so:
fileReader = SkipReader(BufferedReader(FileReader(path)), skipEvery = 30)
Below is an example of how it could be implemented:
import java.io.BufferedReader
import java.io.StringReader
import java.io.Reader
class SkipReader(
private val input: Reader,
private val skipEvery: Int
) : Reader(input) {
private var linesToSkip = skipEvery - 1
private var isSkipping = false
override fun read(cbuf: CharArray, offset: Int, len: Int): Int {
var bytesRead = 0
while (bytesRead < len) {
val value = input.read()
if (value == -1) {
break
}
val c = value.toChar()
if (c == '\n') {
if (linesToSkip == 0) {
linesToSkip = skipEvery - 1
isSkipping = false
} else {
isSkipping = true
}
if (isSkipping) {
linesToSkip--
}
}
if (!isSkipping) {
cbuf[offset + bytesRead] = c
bytesRead++
}
}
return if (bytesRead > 0) bytesRead else -1
}
override fun close() {
input.close()
}
}
It may be a good idea to improve performance by overriding other methods, but not only read
and close
, as they are the only required to be overridden. Also reading several characters at once instead of reading them one-by-one in input.read()
may also help, however it could be already be handled well by BufferedReader.
Testing on a few lines:
fun main() {
val csvText =
"""
Line 1
Line 2
Line 3
Line 4
Line 5
Line 6
Line 7
""".trimIndent()
val reader = SkipReader(BufferedReader(StringReader(csvText)), skipEvery = 2)
for (line in reader.readLines()) {
println(line)
}
}
Result:
Line 1
Line 3
Line 5
Line 7