跳过 CSV 解析器中的每第 N 行
Skip every Nth row in CSV parser
我正在使用 opencsv 解析器。在我的 csv 文件中大约有 100,000 行。阅读它们需要太多时间。我怎样才能跳过并阅读每第 N 行(例如每 30 行)?
private suspend fun readCSVRides(folder: String): MutableList<RideDataCsv>? = withContext(Dispatchers.IO) {
val path = getExternalFilesDir(null)!!.path + "/" + folder + "/" + "measurements.csv"
var segments: MutableList<RideDataCsv>? = null
var fileReader: BufferedReader? = null
val csvToBean: CsvToBean<RideDataCsv>?
try {
fileReader = BufferedReader(FileReader(path))
csvToBean = CsvToBeanBuilder<RideDataCsv>(fileReader)
.withType(RideDataCsv::class.java)
.withIgnoreLeadingWhiteSpace(true)
.withIgnoreEmptyLine(true)
.build()
segments = csvToBean.parse()
} catch (e: Exception) {
println("Reading CSV Error!")
e.printStackTrace()
} finally {
try {
fileReader!!.close()
} catch (e: IOException) {
println("Closing fileReader/csvParser Error!")
e.printStackTrace()
}
}
segments
}
您可以实现一个 Reader
跳过输入中的每第 n 行并像这样包装您的 fileReader
:
fileReader = SkipReader(BufferedReader(FileReader(path)), skipEvery = 30)
以下是如何实施的示例:
import java.io.BufferedReader
import java.io.StringReader
import java.io.Reader
class SkipReader(
private val input: Reader,
private val skipEvery: Int
) : Reader(input) {
private var linesToSkip = skipEvery - 1
private var isSkipping = false
override fun read(cbuf: CharArray, offset: Int, len: Int): Int {
var bytesRead = 0
while (bytesRead < len) {
val value = input.read()
if (value == -1) {
break
}
val c = value.toChar()
if (c == '\n') {
if (linesToSkip == 0) {
linesToSkip = skipEvery - 1
isSkipping = false
} else {
isSkipping = true
}
if (isSkipping) {
linesToSkip--
}
}
if (!isSkipping) {
cbuf[offset + bytesRead] = c
bytesRead++
}
}
return if (bytesRead > 0) bytesRead else -1
}
override fun close() {
input.close()
}
}
通过覆盖其他方法来提高性能可能是个好主意,但不仅限于 read
和 close
,因为它们是唯一需要覆盖的方法。同时读取多个字符而不是在 input.read()
中一个一个地读取它们也可能有帮助,但是 BufferedReader 已经可以很好地处理它。
几行测试:
fun main() {
val csvText =
"""
Line 1
Line 2
Line 3
Line 4
Line 5
Line 6
Line 7
""".trimIndent()
val reader = SkipReader(BufferedReader(StringReader(csvText)), skipEvery = 2)
for (line in reader.readLines()) {
println(line)
}
}
结果:
Line 1
Line 3
Line 5
Line 7
我正在使用 opencsv 解析器。在我的 csv 文件中大约有 100,000 行。阅读它们需要太多时间。我怎样才能跳过并阅读每第 N 行(例如每 30 行)?
private suspend fun readCSVRides(folder: String): MutableList<RideDataCsv>? = withContext(Dispatchers.IO) {
val path = getExternalFilesDir(null)!!.path + "/" + folder + "/" + "measurements.csv"
var segments: MutableList<RideDataCsv>? = null
var fileReader: BufferedReader? = null
val csvToBean: CsvToBean<RideDataCsv>?
try {
fileReader = BufferedReader(FileReader(path))
csvToBean = CsvToBeanBuilder<RideDataCsv>(fileReader)
.withType(RideDataCsv::class.java)
.withIgnoreLeadingWhiteSpace(true)
.withIgnoreEmptyLine(true)
.build()
segments = csvToBean.parse()
} catch (e: Exception) {
println("Reading CSV Error!")
e.printStackTrace()
} finally {
try {
fileReader!!.close()
} catch (e: IOException) {
println("Closing fileReader/csvParser Error!")
e.printStackTrace()
}
}
segments
}
您可以实现一个 Reader
跳过输入中的每第 n 行并像这样包装您的 fileReader
:
fileReader = SkipReader(BufferedReader(FileReader(path)), skipEvery = 30)
以下是如何实施的示例:
import java.io.BufferedReader
import java.io.StringReader
import java.io.Reader
class SkipReader(
private val input: Reader,
private val skipEvery: Int
) : Reader(input) {
private var linesToSkip = skipEvery - 1
private var isSkipping = false
override fun read(cbuf: CharArray, offset: Int, len: Int): Int {
var bytesRead = 0
while (bytesRead < len) {
val value = input.read()
if (value == -1) {
break
}
val c = value.toChar()
if (c == '\n') {
if (linesToSkip == 0) {
linesToSkip = skipEvery - 1
isSkipping = false
} else {
isSkipping = true
}
if (isSkipping) {
linesToSkip--
}
}
if (!isSkipping) {
cbuf[offset + bytesRead] = c
bytesRead++
}
}
return if (bytesRead > 0) bytesRead else -1
}
override fun close() {
input.close()
}
}
通过覆盖其他方法来提高性能可能是个好主意,但不仅限于 read
和 close
,因为它们是唯一需要覆盖的方法。同时读取多个字符而不是在 input.read()
中一个一个地读取它们也可能有帮助,但是 BufferedReader 已经可以很好地处理它。
几行测试:
fun main() {
val csvText =
"""
Line 1
Line 2
Line 3
Line 4
Line 5
Line 6
Line 7
""".trimIndent()
val reader = SkipReader(BufferedReader(StringReader(csvText)), skipEvery = 2)
for (line in reader.readLines()) {
println(line)
}
}
结果:
Line 1
Line 3
Line 5
Line 7