Delphi- 如何在保存前删除非 ANSI(不可打印)字符?
Delphi- How to remove NON ANSI (NOT PRINTABLE) characters before saving?
有人可以指导我以某种方式扩展此过程,以便在将流保存到文件之前删除所有不可打印字符或替换为 SPACE 吗?字符串从二进制文件中读取,最大大小为 1 MB。
我的程序:
var
i : Word;
FileName : TFileName;
SizeofFiles,posi : Integer;
fs, sStream: TFileStream;
SplitFileName: String;
begin
ProgressBar1.Position := 0;
FileName:= lblFilePath.Caption;
SizeofFiles := StrToInt(edt2.Text) ;
posi := StrToInt(edt1.text) ;
fs := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try
fs.Position := Posi ;
begin
SplitFileName := ChangeFileExt(FileName, '.'+ FormatFloat('000', i));
sStream := TFileStream.Create(SplitFileName, fmCreate or fmShareExclusive);
try
if fs.Size - fs.Position < SizeofFiles then
SizeofFiles := fs.Size - fs.Position;
sStream.CopyFrom(fs, SizeofFiles);
ProgressBar1.Position := Round((fs.Position / fs.Size) * 100);
finally
sStream.Free;
end;
end;
finally
fs.Free;
end;
end;
您将无法再使用 TStream.CopyFrom()
。您必须从源 TStream
Read(Buffer)()
到本地字节数组,从该数组中删除您不想要的任何内容,然后 Write(Buffer)()
将剩余字节发送到目标 TStream
.
这是一个简单的演示,可以满足您的需求:
const
SrcFileName : String = 'Test.txt';
DstFileName : String = 'TestResult.txt';
StartPosition : Int64 = 50;
procedure TForm1.Button1Click(Sender: TObject);
var
FS : TFileStream;
Buf : TBytes;
I : Integer;
begin
// Read the source file from starting position
FS := TFileStream.Create(SrcFileName, fmOpenRead or fmShareDenyWrite);
try
FS.Position := StartPosition;
SetLength(Buf, FS.Size - FS.Position);
FS.Read(Buf[0], Length(Buf));
finally
FreeAndNil(FS);
end;
// Replace all non printable character by a space
// Assume file content is ASCII characters
for I := 0 to Length(Buf) - 1 do begin
// You may want to make a more complex test for printable of not
if (Ord(Buf[I]) < Ord(' ')) or (Ord(Buf[I]) > 126) then
Buf[I] := Ord(' ');
end;
// Write destination file
FS := TFileStream.Create(DstFileName, fmCreate);
try
FS.Write(Buf[0], Length(Buf));
finally
FreeAndNil(FS);
end;
end;
此代码假定文件是纯 ASCII 文本,并且 ASCII 代码低于 32 (space) 或高于 126 的每个字符均不可打印。欧洲语言可能并非如此。您可以轻松调整测试以满足您的需求。
源文件也可以是 Unicode(16 位字符)。您应该使用由 Unicode 字符或 16 位整数 (Word) 组成的缓冲区。并调整可打印的测试。
也可以是UTF8...
有人可以指导我以某种方式扩展此过程,以便在将流保存到文件之前删除所有不可打印字符或替换为 SPACE 吗?字符串从二进制文件中读取,最大大小为 1 MB。 我的程序:
var
i : Word;
FileName : TFileName;
SizeofFiles,posi : Integer;
fs, sStream: TFileStream;
SplitFileName: String;
begin
ProgressBar1.Position := 0;
FileName:= lblFilePath.Caption;
SizeofFiles := StrToInt(edt2.Text) ;
posi := StrToInt(edt1.text) ;
fs := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try
fs.Position := Posi ;
begin
SplitFileName := ChangeFileExt(FileName, '.'+ FormatFloat('000', i));
sStream := TFileStream.Create(SplitFileName, fmCreate or fmShareExclusive);
try
if fs.Size - fs.Position < SizeofFiles then
SizeofFiles := fs.Size - fs.Position;
sStream.CopyFrom(fs, SizeofFiles);
ProgressBar1.Position := Round((fs.Position / fs.Size) * 100);
finally
sStream.Free;
end;
end;
finally
fs.Free;
end;
end;
您将无法再使用 TStream.CopyFrom()
。您必须从源 TStream
Read(Buffer)()
到本地字节数组,从该数组中删除您不想要的任何内容,然后 Write(Buffer)()
将剩余字节发送到目标 TStream
.
这是一个简单的演示,可以满足您的需求:
const
SrcFileName : String = 'Test.txt';
DstFileName : String = 'TestResult.txt';
StartPosition : Int64 = 50;
procedure TForm1.Button1Click(Sender: TObject);
var
FS : TFileStream;
Buf : TBytes;
I : Integer;
begin
// Read the source file from starting position
FS := TFileStream.Create(SrcFileName, fmOpenRead or fmShareDenyWrite);
try
FS.Position := StartPosition;
SetLength(Buf, FS.Size - FS.Position);
FS.Read(Buf[0], Length(Buf));
finally
FreeAndNil(FS);
end;
// Replace all non printable character by a space
// Assume file content is ASCII characters
for I := 0 to Length(Buf) - 1 do begin
// You may want to make a more complex test for printable of not
if (Ord(Buf[I]) < Ord(' ')) or (Ord(Buf[I]) > 126) then
Buf[I] := Ord(' ');
end;
// Write destination file
FS := TFileStream.Create(DstFileName, fmCreate);
try
FS.Write(Buf[0], Length(Buf));
finally
FreeAndNil(FS);
end;
end;
此代码假定文件是纯 ASCII 文本,并且 ASCII 代码低于 32 (space) 或高于 126 的每个字符均不可打印。欧洲语言可能并非如此。您可以轻松调整测试以满足您的需求。
源文件也可以是 Unicode(16 位字符)。您应该使用由 Unicode 字符或 16 位整数 (Word) 组成的缓冲区。并调整可打印的测试。
也可以是UTF8...