按频率排序 Delphi TStringList

Sort Delphi TStringList by frequency

我有一个 csv 值的 TStringList;目前每个字符串都有四个值,v0、v1、v2、v3,其中任何一个都可以重复。我想为每个字符串添加第 5 个值 (v4),这将是 v3 的计数。然后该列表应按 v4、v3 降序排序。

例如,这个数据:
1, 2, 3, 4
1, 3, 4, 4
2, 2, 1, 2
1, 2, 3, 4
3, 1, 2, 2
3, 1, 2, 1

应该return以下列表:
1, 2, 3, 4, 3
1, 3, 4, 4, 3
1, 2, 3, 4, 3
2, 2, 1, 2, 2
3, 1, 2, 2, 2
3, 1, 2, 1, 1
其中每行中的第 5 个元素是第 4 个元素的频率,并按该元素降序排列。

这不是作业问题,

我目前正在使用 COM 将列表保存到 Excel,因此有一个包含 4 列的 sheet。然后我在第 5 列 =COUNTIF(D:D, D1) 中插入一个公式,计算 v4 的出现次数。然后,我按第 5 列降序对 sheet 进行排序,然后将 sheet 重新导入到字符串列表中。这行得通,但我知道并非我的所有用户都会有 Excel,所以我希望有一个更简单的解决方案。

一个简单的方法可能如下所示:

uses
  Classes,
  StrUtils;

function SortValues(List: TStringList; Index1, Index2: Integer): Integer;
var
  Left, Right: TStringDynArray;
begin
  Left := SplitString(List[Index1], ',');
  Right := SplitString(List[Index2], ',');
  Result := Right[4] - Left[4];
  if Result = 0 then
    Result := Right[3] - Left[3];  
end;

var
  CSV: TStringList;
  I, J, Frequency: Integer;
  Values, Value2: TStringDynArray;
begin
  CSV := TStringList.Create;
  try
    // populate CSV as needed...

    for I := 0 to CSV.Count-1 do
    begin
      Values := SplitString(CSV[I], ',');
      if Length(Values) <> 4 then
        raise Exception.Create('Bad Input!');

      Frequency := 1;
      for J := 0 to CSV.Count-1 do
      begin
        if J <> I then
        begin
          Values2 := SplitString(CSV[J], ',');
          if Length(Values2) <> 4 then
            raise Exception.Create('Bad Input!');
          if Values2[3] = Values[3] then
            Inc(Frequency);
        end;
      end;

      CSV[I] := CSV[I] + ', ' + IntToStr(Frequency);
    end;

    CSV.CustomSort(@SortValues);

    // use CSV as needed...

  finally
    CSV.Free;
  end;
end;

但是,这有很多开销,一遍又一遍地解析和重新解析 CSV 字符串。这可以通过减少解析 CSV 字符串的次数并缓存结果来更好地优化,例如:

uses
  Classes,
  StrUtils;

type
  PCSVInfo = ^CSVInfo;
  CSVInfo = record
    Line: string;
    Value: Integer;
    Frequency: Integer;
  end;

function SortValues(List: TStringList; Index1, Index2: Integer): Integer;
var
  Left, Right: PCSVInfo;
begin
  Left := PCSVInfo(List.Objects[Index1]);
  Right := PCSVInfo(List.Objects[Index2]);
  Result := Right.Frequency - Left.Frequency;
  if Result = 0 then
    Result := Right.Value - Left.Value;
end;

var
  CSV: TStringList;
  I, J: Integer;
  Values: TStringDynArray;
  Info: CSVInfo;
  InfoArr: array of CSVInfo;
begin
  CSV := TStringList.Create;
  try
    // populate CSV as needed...

    SetLength(InfoArr, CSV.Count);

    for I := 0 to CSV.Count-1 do
    begin
      Values := SplitString(CSV[I], ',');
      if Length(Values) <> 4 then
        raise Exception.Create('Bad Input!');
      InfoArr[I].Line := CSV[I];
      InfoArr[I].Value := Values[3];
      InfoArr[I].Frequency := 0;
    end;

    for I := 0 to CSV.Count-1 do
    begin
      InfoArr[I].Frequency := 1;
      for J := 0 to CSV.Count-1 do
      begin
        if (J <> I) and (InfoArr[J].Value = InfoArr[I].Value) then
          Inc(InfoArr[I].Frequency);
      end;
      CSV[I] := CSV[I] + ', ' + IntToStr(InfoArr[I].Frequency);
      CSV.Objects[I] := TObject(@InfoArr[I]);
    end;

    CSV.CustomSort(@SortValues);

    // use CSV as needed...

  finally
    CSV.Free;
  end;
end;

或者,您可以在解析 CSV 字符串时计算频率并将它们存储在 TDictionary 中,然后使用 TList<T> 对详细信息进行排序,例如:

uses
  System.Classes,
  System.Generics.Defaults,
  System.Generics.Collections,
  System.StrUtils;

type
  CSVInfo = record
    Line: string;
    Value: Integer;
    Frequency: Integer;
  end;

var
  CSV: TStringList;
  I, Frequency: Integer;
  Values: TStringDynArray;
  Info: CSVInfo;
  InfoList: TList<CSVInfo>;
  Frequencies: TDictionary<Integer, Integer>;
begin
  CSV := TStringList.Create;
  try
    // populate CSV as needed...

    InfoList := TList<CSVInfo>.Create;
    try
      InfoList.Count := CSV.Count;

      Frequencies := TDictionary<Integer, Integer>.Create;
      try
        for I := 0 to CSV.Count-1 do
        begin
          Values := SplitString(CSV[I], ',');
          if Length(Values) <> 4 then
            raise Exception.Create('Bad Input!');

          Info.Line := CSV[I];
          Info.Value := Values[3];
          Info.Frequency := 0;
          InfoList[I] := Info;

          if Frequencies.TryGetValue(Info.Value, Frequency) then
            Inc(Frequency)
          else
            Frequency := 1;
          Frequencies.AddOrSetValue(Info.Value, Frequency);
        end;

        for I := 0 to InfoList.Count-1 do
        begin
          Info := InfoList[I];
          Info.Frequency := Frequencies[Info.Value];
          InfoList[I] := Info;
        end;
      finally
        Frequencies.Free;
      end;

      InfoList.Sort(
        TDelegatedComparer<CSVInfo>.Create(
          function(const Left, Right: CSVInfo): Integer
          begin
            Result := Right.Frequency - Left.Frequency;
            if Result = 0 then
              Result := Right.Value - Left.Value;
          end
        )
      );

      for I := 0 to InfoList.Count-1 do
      begin
        Info := InfoList[I];
        CSV[I] := Info.Line + ', ' + IntToStr(Info.Frequency);
      end;
    finally
      List.Free;
    end;

    // use CSV as needed...

  finally
    CSV.Free;
  end;
end;