搜索 FileStream 然后使用 StreamReader 从那里读取
Seek through FileStream then using StreamReader to read from there
所以我希望能够在 fileStream 中寻找一个点,然后使用 StreamReader 向前读取。然后再向前查找,使用StreamReader读取另一块数据。
const int BufferSize = 4096;
var buffer = new char[BufferSize];
var endpoints = new List<long>();
using (var fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
{
var fileLength = fileStream.Length;
var seekPositionCount = fileLength / concurrentReads;
long currentOffset = 0;
for (var i = 0; i < concurrentReads; i++)
{
var seekPosition = seekPositionCount + currentOffset;
// seek the file forward
fileStream.Seek(seekPosition, SeekOrigin.Current);
// setting true at the end is very important, keeps the underlying fileStream open.
using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize, true))
{
// this also seeks the file forward the amount in the buffer...
int bytesRead;
var totalBytesRead = 0;
while ((bytesRead = await streamReader.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
totalBytesRead += bytesRead;
var found = false;
var gotR = false;
for (var j = 0; j < buffer.Length; j++)
{
if (buffer[j] == '\r')
{
gotR = true;
continue;
}
if (buffer[j] == '\n' && gotR)
{
// so we add the total bytes read, minus the current buffer amount read, then add how far into the buffer we actually read.
seekPosition += totalBytesRead - BufferSize + j;
endpoints.Add(seekPosition);
found = true;
break;
}
}
if (found) break;
}
}
// we need to seek to the position we got to in the StreamReader (but not going by how much was read).
fileStream.Seek(seekPosition, SeekOrigin.Current);
currentOffset += seekPosition;
}
}
return endpoints;
但是,我进入 endpoints
中的两个条目并退出。
(bytesRead = await streamReader.ReadAsync(buffer, 0, buffer.Length)) > 0
你传递给 ReadAsync
的参数我认为只是与缓冲区有关,所以我认为 index
参数是说,在 [=16] 处填充 buffer
=].
我无法从 Reference Source 中看出这个值是如何使用的。
我假设(并且无法找到支持的证据),当你打开 StreamReader
它使用底层 Stream
作为它的指南,所以当你要求阅读一些字节,它将从底层 Stream
所在的位置开始...
但是我所做的结果并没有显示出来,它们似乎显示 StreamReader
每次都从 Stream
的开头开始 - 但是,我也找不到支持它是如何做到的证据...
寻求
我对 seeking 的理解是否正确,如果我调用 seek
fileStream.Seek(seekPosition, SeekOrigin.Current);
如果文件在300
,我想找600
,上面的变量seekPosition
应该是600
??
ReferenceSource 另有说法:
else if (origin == SeekOrigin.Current) {
// Don't call FlushRead here, which would have caused an infinite
// loop. Simply adjust the seek origin. This isn't necessary
// if we're seeking relative to the beginning or end of the stream.
offset -= (_readLen - _readPos);
}
感谢 Hans Passant,我得到了答案:
var buffer = new char[BufferSize];
var endpoints = new List<long>();
using (var fileStream = this.CreateMultipleReadAccessFileStream(fileName))
{
var fileLength = fileStream.Length;
var seekPositionCount = fileLength / concurrentReads;
long currentOffset = 0;
for (var i = 0; i < concurrentReads; i++)
{
var seekPosition = seekPositionCount + currentOffset;
// seek the file forward
// fileStream.Seek(seekPosition, SeekOrigin.Current);
// setting true at the end is very important, keeps the underlying fileStream open.
using (var streamReader = this.CreateTemporaryStreamReader(fileStream))
{
// this is poor on performance, hence why you split the file here and read in new threads.
streamReader.DiscardBufferedData();
// you have to advance the fileStream here, because of the previous line
streamReader.BaseStream.Seek(seekPosition, SeekOrigin.Begin);
// this also seeks the file forward the amount in the buffer...
int bytesRead;
var totalBytesRead = 0;
while ((bytesRead = await streamReader.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
totalBytesRead += bytesRead;
var found = false;
var gotR = false;
for (var j = 0; j < buffer.Length; j++)
{
if (buffer[j] == '\r')
{
gotR = true;
continue;
}
if (buffer[j] == '\n' && gotR)
{
// so we add the total bytes read, minus the current buffer amount read, then add how far into the buffer we actually read.
seekPosition += totalBytesRead - BufferSize + j;
endpoints.Add(seekPosition);
found = true;
break;
}
// if we have found new line then move the position to
}
if (found) break;
}
}
currentOffset = seekPosition;
}
}
return endpoints;
注意新部分,而不是这样做两次:
fileStream.Seek(seekPosition, SeekOrigin.Current);
我现在使用 SeekOrigin.Begin
并使用 StreamReader
来推进底层基础流:
// this is poor on performance, hence why you split the file here and read in new threads.
streamReader.DiscardBufferedData();
// you have to advance the fileStream here, because of the previous line
streamReader.BaseStream.Seek(seekPosition, SeekOrigin.Begin);
DiscardBufferedData
意味着我一直在使用底层流位置。
所以我希望能够在 fileStream 中寻找一个点,然后使用 StreamReader 向前读取。然后再向前查找,使用StreamReader读取另一块数据。
const int BufferSize = 4096;
var buffer = new char[BufferSize];
var endpoints = new List<long>();
using (var fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
{
var fileLength = fileStream.Length;
var seekPositionCount = fileLength / concurrentReads;
long currentOffset = 0;
for (var i = 0; i < concurrentReads; i++)
{
var seekPosition = seekPositionCount + currentOffset;
// seek the file forward
fileStream.Seek(seekPosition, SeekOrigin.Current);
// setting true at the end is very important, keeps the underlying fileStream open.
using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize, true))
{
// this also seeks the file forward the amount in the buffer...
int bytesRead;
var totalBytesRead = 0;
while ((bytesRead = await streamReader.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
totalBytesRead += bytesRead;
var found = false;
var gotR = false;
for (var j = 0; j < buffer.Length; j++)
{
if (buffer[j] == '\r')
{
gotR = true;
continue;
}
if (buffer[j] == '\n' && gotR)
{
// so we add the total bytes read, minus the current buffer amount read, then add how far into the buffer we actually read.
seekPosition += totalBytesRead - BufferSize + j;
endpoints.Add(seekPosition);
found = true;
break;
}
}
if (found) break;
}
}
// we need to seek to the position we got to in the StreamReader (but not going by how much was read).
fileStream.Seek(seekPosition, SeekOrigin.Current);
currentOffset += seekPosition;
}
}
return endpoints;
但是,我进入 endpoints
中的两个条目并退出。
(bytesRead = await streamReader.ReadAsync(buffer, 0, buffer.Length)) > 0
你传递给 ReadAsync
的参数我认为只是与缓冲区有关,所以我认为 index
参数是说,在 [=16] 处填充 buffer
=].
我无法从 Reference Source 中看出这个值是如何使用的。
我假设(并且无法找到支持的证据),当你打开 StreamReader
它使用底层 Stream
作为它的指南,所以当你要求阅读一些字节,它将从底层 Stream
所在的位置开始...
但是我所做的结果并没有显示出来,它们似乎显示 StreamReader
每次都从 Stream
的开头开始 - 但是,我也找不到支持它是如何做到的证据...
寻求
我对 seeking 的理解是否正确,如果我调用 seek
fileStream.Seek(seekPosition, SeekOrigin.Current);
如果文件在300
,我想找600
,上面的变量seekPosition
应该是600
??
ReferenceSource 另有说法:
else if (origin == SeekOrigin.Current) {
// Don't call FlushRead here, which would have caused an infinite
// loop. Simply adjust the seek origin. This isn't necessary
// if we're seeking relative to the beginning or end of the stream.
offset -= (_readLen - _readPos);
}
感谢 Hans Passant,我得到了答案:
var buffer = new char[BufferSize];
var endpoints = new List<long>();
using (var fileStream = this.CreateMultipleReadAccessFileStream(fileName))
{
var fileLength = fileStream.Length;
var seekPositionCount = fileLength / concurrentReads;
long currentOffset = 0;
for (var i = 0; i < concurrentReads; i++)
{
var seekPosition = seekPositionCount + currentOffset;
// seek the file forward
// fileStream.Seek(seekPosition, SeekOrigin.Current);
// setting true at the end is very important, keeps the underlying fileStream open.
using (var streamReader = this.CreateTemporaryStreamReader(fileStream))
{
// this is poor on performance, hence why you split the file here and read in new threads.
streamReader.DiscardBufferedData();
// you have to advance the fileStream here, because of the previous line
streamReader.BaseStream.Seek(seekPosition, SeekOrigin.Begin);
// this also seeks the file forward the amount in the buffer...
int bytesRead;
var totalBytesRead = 0;
while ((bytesRead = await streamReader.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
totalBytesRead += bytesRead;
var found = false;
var gotR = false;
for (var j = 0; j < buffer.Length; j++)
{
if (buffer[j] == '\r')
{
gotR = true;
continue;
}
if (buffer[j] == '\n' && gotR)
{
// so we add the total bytes read, minus the current buffer amount read, then add how far into the buffer we actually read.
seekPosition += totalBytesRead - BufferSize + j;
endpoints.Add(seekPosition);
found = true;
break;
}
// if we have found new line then move the position to
}
if (found) break;
}
}
currentOffset = seekPosition;
}
}
return endpoints;
注意新部分,而不是这样做两次:
fileStream.Seek(seekPosition, SeekOrigin.Current);
我现在使用 SeekOrigin.Begin
并使用 StreamReader
来推进底层基础流:
// this is poor on performance, hence why you split the file here and read in new threads.
streamReader.DiscardBufferedData();
// you have to advance the fileStream here, because of the previous line
streamReader.BaseStream.Seek(seekPosition, SeekOrigin.Begin);
DiscardBufferedData
意味着我一直在使用底层流位置。