按频率排序 Delphi TStringList
Sort Delphi TStringList by frequency
我有一个 csv 值的 TStringList;目前每个字符串都有四个值,v0、v1、v2、v3,其中任何一个都可以重复。我想为每个字符串添加第 5 个值 (v4),这将是 v3 的计数。然后该列表应按 v4、v3 降序排序。
例如,这个数据:
1, 2, 3, 4
1, 3, 4, 4
2, 2, 1, 2
1, 2, 3, 4
3, 1, 2, 2
3, 1, 2, 1
应该return以下列表:
1, 2, 3, 4, 3
1, 3, 4, 4, 3
1, 2, 3, 4, 3
2, 2, 1, 2, 2
3, 1, 2, 2, 2
3, 1, 2, 1, 1
其中每行中的第 5 个元素是第 4 个元素的频率,并按该元素降序排列。
这不是作业问题,
我目前正在使用 COM 将列表保存到 Excel,因此有一个包含 4 列的 sheet。然后我在第 5 列 =COUNTIF(D:D, D1) 中插入一个公式,计算 v4 的出现次数。然后,我按第 5 列降序对 sheet 进行排序,然后将 sheet 重新导入到字符串列表中。这行得通,但我知道并非我的所有用户都会有 Excel,所以我希望有一个更简单的解决方案。
一个简单的方法可能如下所示:
uses
Classes,
StrUtils;
function SortValues(List: TStringList; Index1, Index2: Integer): Integer;
var
Left, Right: TStringDynArray;
begin
Left := SplitString(List[Index1], ',');
Right := SplitString(List[Index2], ',');
Result := Right[4] - Left[4];
if Result = 0 then
Result := Right[3] - Left[3];
end;
var
CSV: TStringList;
I, J, Frequency: Integer;
Values, Value2: TStringDynArray;
begin
CSV := TStringList.Create;
try
// populate CSV as needed...
for I := 0 to CSV.Count-1 do
begin
Values := SplitString(CSV[I], ',');
if Length(Values) <> 4 then
raise Exception.Create('Bad Input!');
Frequency := 1;
for J := 0 to CSV.Count-1 do
begin
if J <> I then
begin
Values2 := SplitString(CSV[J], ',');
if Length(Values2) <> 4 then
raise Exception.Create('Bad Input!');
if Values2[3] = Values[3] then
Inc(Frequency);
end;
end;
CSV[I] := CSV[I] + ', ' + IntToStr(Frequency);
end;
CSV.CustomSort(@SortValues);
// use CSV as needed...
finally
CSV.Free;
end;
end;
但是,这有很多开销,一遍又一遍地解析和重新解析 CSV 字符串。这可以通过减少解析 CSV 字符串的次数并缓存结果来更好地优化,例如:
uses
Classes,
StrUtils;
type
PCSVInfo = ^CSVInfo;
CSVInfo = record
Line: string;
Value: Integer;
Frequency: Integer;
end;
function SortValues(List: TStringList; Index1, Index2: Integer): Integer;
var
Left, Right: PCSVInfo;
begin
Left := PCSVInfo(List.Objects[Index1]);
Right := PCSVInfo(List.Objects[Index2]);
Result := Right.Frequency - Left.Frequency;
if Result = 0 then
Result := Right.Value - Left.Value;
end;
var
CSV: TStringList;
I, J: Integer;
Values: TStringDynArray;
Info: CSVInfo;
InfoArr: array of CSVInfo;
begin
CSV := TStringList.Create;
try
// populate CSV as needed...
SetLength(InfoArr, CSV.Count);
for I := 0 to CSV.Count-1 do
begin
Values := SplitString(CSV[I], ',');
if Length(Values) <> 4 then
raise Exception.Create('Bad Input!');
InfoArr[I].Line := CSV[I];
InfoArr[I].Value := Values[3];
InfoArr[I].Frequency := 0;
end;
for I := 0 to CSV.Count-1 do
begin
InfoArr[I].Frequency := 1;
for J := 0 to CSV.Count-1 do
begin
if (J <> I) and (InfoArr[J].Value = InfoArr[I].Value) then
Inc(InfoArr[I].Frequency);
end;
CSV[I] := CSV[I] + ', ' + IntToStr(InfoArr[I].Frequency);
CSV.Objects[I] := TObject(@InfoArr[I]);
end;
CSV.CustomSort(@SortValues);
// use CSV as needed...
finally
CSV.Free;
end;
end;
或者,您可以在解析 CSV 字符串时计算频率并将它们存储在 TDictionary
中,然后使用 TList<T>
对详细信息进行排序,例如:
uses
System.Classes,
System.Generics.Defaults,
System.Generics.Collections,
System.StrUtils;
type
CSVInfo = record
Line: string;
Value: Integer;
Frequency: Integer;
end;
var
CSV: TStringList;
I, Frequency: Integer;
Values: TStringDynArray;
Info: CSVInfo;
InfoList: TList<CSVInfo>;
Frequencies: TDictionary<Integer, Integer>;
begin
CSV := TStringList.Create;
try
// populate CSV as needed...
InfoList := TList<CSVInfo>.Create;
try
InfoList.Count := CSV.Count;
Frequencies := TDictionary<Integer, Integer>.Create;
try
for I := 0 to CSV.Count-1 do
begin
Values := SplitString(CSV[I], ',');
if Length(Values) <> 4 then
raise Exception.Create('Bad Input!');
Info.Line := CSV[I];
Info.Value := Values[3];
Info.Frequency := 0;
InfoList[I] := Info;
if Frequencies.TryGetValue(Info.Value, Frequency) then
Inc(Frequency)
else
Frequency := 1;
Frequencies.AddOrSetValue(Info.Value, Frequency);
end;
for I := 0 to InfoList.Count-1 do
begin
Info := InfoList[I];
Info.Frequency := Frequencies[Info.Value];
InfoList[I] := Info;
end;
finally
Frequencies.Free;
end;
InfoList.Sort(
TDelegatedComparer<CSVInfo>.Create(
function(const Left, Right: CSVInfo): Integer
begin
Result := Right.Frequency - Left.Frequency;
if Result = 0 then
Result := Right.Value - Left.Value;
end
)
);
for I := 0 to InfoList.Count-1 do
begin
Info := InfoList[I];
CSV[I] := Info.Line + ', ' + IntToStr(Info.Frequency);
end;
finally
List.Free;
end;
// use CSV as needed...
finally
CSV.Free;
end;
end;
我有一个 csv 值的 TStringList;目前每个字符串都有四个值,v0、v1、v2、v3,其中任何一个都可以重复。我想为每个字符串添加第 5 个值 (v4),这将是 v3 的计数。然后该列表应按 v4、v3 降序排序。
例如,这个数据:
1, 2, 3, 4
1, 3, 4, 4
2, 2, 1, 2
1, 2, 3, 4
3, 1, 2, 2
3, 1, 2, 1
应该return以下列表:
1, 2, 3, 4, 3
1, 3, 4, 4, 3
1, 2, 3, 4, 3
2, 2, 1, 2, 2
3, 1, 2, 2, 2
3, 1, 2, 1, 1
其中每行中的第 5 个元素是第 4 个元素的频率,并按该元素降序排列。
这不是作业问题,
我目前正在使用 COM 将列表保存到 Excel,因此有一个包含 4 列的 sheet。然后我在第 5 列 =COUNTIF(D:D, D1) 中插入一个公式,计算 v4 的出现次数。然后,我按第 5 列降序对 sheet 进行排序,然后将 sheet 重新导入到字符串列表中。这行得通,但我知道并非我的所有用户都会有 Excel,所以我希望有一个更简单的解决方案。
一个简单的方法可能如下所示:
uses
Classes,
StrUtils;
function SortValues(List: TStringList; Index1, Index2: Integer): Integer;
var
Left, Right: TStringDynArray;
begin
Left := SplitString(List[Index1], ',');
Right := SplitString(List[Index2], ',');
Result := Right[4] - Left[4];
if Result = 0 then
Result := Right[3] - Left[3];
end;
var
CSV: TStringList;
I, J, Frequency: Integer;
Values, Value2: TStringDynArray;
begin
CSV := TStringList.Create;
try
// populate CSV as needed...
for I := 0 to CSV.Count-1 do
begin
Values := SplitString(CSV[I], ',');
if Length(Values) <> 4 then
raise Exception.Create('Bad Input!');
Frequency := 1;
for J := 0 to CSV.Count-1 do
begin
if J <> I then
begin
Values2 := SplitString(CSV[J], ',');
if Length(Values2) <> 4 then
raise Exception.Create('Bad Input!');
if Values2[3] = Values[3] then
Inc(Frequency);
end;
end;
CSV[I] := CSV[I] + ', ' + IntToStr(Frequency);
end;
CSV.CustomSort(@SortValues);
// use CSV as needed...
finally
CSV.Free;
end;
end;
但是,这有很多开销,一遍又一遍地解析和重新解析 CSV 字符串。这可以通过减少解析 CSV 字符串的次数并缓存结果来更好地优化,例如:
uses
Classes,
StrUtils;
type
PCSVInfo = ^CSVInfo;
CSVInfo = record
Line: string;
Value: Integer;
Frequency: Integer;
end;
function SortValues(List: TStringList; Index1, Index2: Integer): Integer;
var
Left, Right: PCSVInfo;
begin
Left := PCSVInfo(List.Objects[Index1]);
Right := PCSVInfo(List.Objects[Index2]);
Result := Right.Frequency - Left.Frequency;
if Result = 0 then
Result := Right.Value - Left.Value;
end;
var
CSV: TStringList;
I, J: Integer;
Values: TStringDynArray;
Info: CSVInfo;
InfoArr: array of CSVInfo;
begin
CSV := TStringList.Create;
try
// populate CSV as needed...
SetLength(InfoArr, CSV.Count);
for I := 0 to CSV.Count-1 do
begin
Values := SplitString(CSV[I], ',');
if Length(Values) <> 4 then
raise Exception.Create('Bad Input!');
InfoArr[I].Line := CSV[I];
InfoArr[I].Value := Values[3];
InfoArr[I].Frequency := 0;
end;
for I := 0 to CSV.Count-1 do
begin
InfoArr[I].Frequency := 1;
for J := 0 to CSV.Count-1 do
begin
if (J <> I) and (InfoArr[J].Value = InfoArr[I].Value) then
Inc(InfoArr[I].Frequency);
end;
CSV[I] := CSV[I] + ', ' + IntToStr(InfoArr[I].Frequency);
CSV.Objects[I] := TObject(@InfoArr[I]);
end;
CSV.CustomSort(@SortValues);
// use CSV as needed...
finally
CSV.Free;
end;
end;
或者,您可以在解析 CSV 字符串时计算频率并将它们存储在 TDictionary
中,然后使用 TList<T>
对详细信息进行排序,例如:
uses
System.Classes,
System.Generics.Defaults,
System.Generics.Collections,
System.StrUtils;
type
CSVInfo = record
Line: string;
Value: Integer;
Frequency: Integer;
end;
var
CSV: TStringList;
I, Frequency: Integer;
Values: TStringDynArray;
Info: CSVInfo;
InfoList: TList<CSVInfo>;
Frequencies: TDictionary<Integer, Integer>;
begin
CSV := TStringList.Create;
try
// populate CSV as needed...
InfoList := TList<CSVInfo>.Create;
try
InfoList.Count := CSV.Count;
Frequencies := TDictionary<Integer, Integer>.Create;
try
for I := 0 to CSV.Count-1 do
begin
Values := SplitString(CSV[I], ',');
if Length(Values) <> 4 then
raise Exception.Create('Bad Input!');
Info.Line := CSV[I];
Info.Value := Values[3];
Info.Frequency := 0;
InfoList[I] := Info;
if Frequencies.TryGetValue(Info.Value, Frequency) then
Inc(Frequency)
else
Frequency := 1;
Frequencies.AddOrSetValue(Info.Value, Frequency);
end;
for I := 0 to InfoList.Count-1 do
begin
Info := InfoList[I];
Info.Frequency := Frequencies[Info.Value];
InfoList[I] := Info;
end;
finally
Frequencies.Free;
end;
InfoList.Sort(
TDelegatedComparer<CSVInfo>.Create(
function(const Left, Right: CSVInfo): Integer
begin
Result := Right.Frequency - Left.Frequency;
if Result = 0 then
Result := Right.Value - Left.Value;
end
)
);
for I := 0 to InfoList.Count-1 do
begin
Info := InfoList[I];
CSV[I] := Info.Line + ', ' + IntToStr(Info.Frequency);
end;
finally
List.Free;
end;
// use CSV as needed...
finally
CSV.Free;
end;
end;