在第二个分隔符处拆分文件流
Splitting file stream at second delimiter
我目前正在尝试流式传输一个文件,它看起来有点像这样:
Header
Title
Header
Title
...
我处理它的方式是这样的流:
fs.createReadStream(filePath, streamOpts)
.pipe(split())
.on('data', parseLine(data) => ...);
然而,这会在每一行拆分文件,因此 data
只是下一行的单个字符串。例如,
parseLine(line) => { console.log(data); }
> "Header"
我想要的是将它拆分并以某种方式将 data
变成一个对象,例如
> "{ header: 'Header', title: 'Title' }"
有办法吗?我想在 pipe
期间需要两行而不是一行,但我不知道如何。
我目前的方法论如下:
const fs = require('fs');
const split = require('split');
var isHeaderLine = true;
var currentItem = {};
var items = [];
// Read the line from the stream
function parseFileLine(line) {
if (isHeaderLine) {
currentItem.header = line
} else {
currentItem.title = line
items.push(currentItem);
}
}
// Read the file as a stream
// Break it into processable lines
function parseFileLines(filePath) {
const streamOpts = {
encoding: 'utf8',
};
fs.createReadStream(filePath, streamOpts)
.pipe(split())
.on('data', parseLine);
}
但是我觉得这不是最好的方法。有没有更好的方法来一次拆分和传递两行,最好是作为一个对象或作为一个数组?
最终使用 Transform
解决了这个问题:
function bufferSplitOnce(buffer, splitBuffer, nth) {
const splitIdx = buffer.indexOf(buffer, splitBuffer, nth);
return (splitIdx === -1)
? [buffer]
: [buffer.slice(0, splitIdx), buffer.slice(splitIdx + 1)];
}
function bufferSplitN(buffer, splitBuffer, n) {
const result = [buffer];
let currentItem;
for (let i = 0; i < n; i += 1) {
result.push(...bufferSplitOnce(result.pop(), splitBuffer));
}
return result;
}
const streamToEntry = new Transform({
transform(chunk, encoding, callback) {
let chunkTail = chunk;
while (chunkTail) {
const splitChunk = bufferSplitN(chunkTail, '\n', 2);
this.push({
code: splitChunk[0],
value: splitChunk[1],
});
// eslint-disable-next-line prefer-destructuring
chunkTail = splitChunk[2];
}
callback();
},
readableObjectMode: true,
writableObjectMode: true,
});
fs.createReadStream(filePath, streamOpts)
.pipe(streamToEntry)
.on('data', (chunk) => {
console.log(chunk);
});
我目前正在尝试流式传输一个文件,它看起来有点像这样:
Header
Title
Header
Title
...
我处理它的方式是这样的流:
fs.createReadStream(filePath, streamOpts)
.pipe(split())
.on('data', parseLine(data) => ...);
然而,这会在每一行拆分文件,因此 data
只是下一行的单个字符串。例如,
parseLine(line) => { console.log(data); }
> "Header"
我想要的是将它拆分并以某种方式将 data
变成一个对象,例如
> "{ header: 'Header', title: 'Title' }"
有办法吗?我想在 pipe
期间需要两行而不是一行,但我不知道如何。
我目前的方法论如下:
const fs = require('fs');
const split = require('split');
var isHeaderLine = true;
var currentItem = {};
var items = [];
// Read the line from the stream
function parseFileLine(line) {
if (isHeaderLine) {
currentItem.header = line
} else {
currentItem.title = line
items.push(currentItem);
}
}
// Read the file as a stream
// Break it into processable lines
function parseFileLines(filePath) {
const streamOpts = {
encoding: 'utf8',
};
fs.createReadStream(filePath, streamOpts)
.pipe(split())
.on('data', parseLine);
}
但是我觉得这不是最好的方法。有没有更好的方法来一次拆分和传递两行,最好是作为一个对象或作为一个数组?
最终使用 Transform
解决了这个问题:
function bufferSplitOnce(buffer, splitBuffer, nth) {
const splitIdx = buffer.indexOf(buffer, splitBuffer, nth);
return (splitIdx === -1)
? [buffer]
: [buffer.slice(0, splitIdx), buffer.slice(splitIdx + 1)];
}
function bufferSplitN(buffer, splitBuffer, n) {
const result = [buffer];
let currentItem;
for (let i = 0; i < n; i += 1) {
result.push(...bufferSplitOnce(result.pop(), splitBuffer));
}
return result;
}
const streamToEntry = new Transform({
transform(chunk, encoding, callback) {
let chunkTail = chunk;
while (chunkTail) {
const splitChunk = bufferSplitN(chunkTail, '\n', 2);
this.push({
code: splitChunk[0],
value: splitChunk[1],
});
// eslint-disable-next-line prefer-destructuring
chunkTail = splitChunk[2];
}
callback();
},
readableObjectMode: true,
writableObjectMode: true,
});
fs.createReadStream(filePath, streamOpts)
.pipe(streamToEntry)
.on('data', (chunk) => {
console.log(chunk);
});