如何进行 Mutlithreded idhttp 调用以在 StringList 上工作

How to make a Mutlithreded idhttp calls to do work on a StringList

我是线程的新手,我有一个包含字符串的列表。我的目标是让多个线程对这个列表起作用,这个代码只适用于一个线程,因为我目前正在学习,但是当我按下开始按钮时我得到了 AV。

type
  TDemoThread = class(TThread)
  private
    procedure Abort;
  protected
    procedure Execute; override;
  public
    List: TStringList;
  end;

procedure TfrmMain.StartButton1Click(Sender: TObject);
var
  i: integer;
  List: Tstrings;
begin
  for i := 0 to memo1.Lines.Count - 1 do
  begin
    List := TStringList.Create;
    List.Add(memo1.Lines.Strings[i]);
  end;

  Thread := TDemoThread.Create(True);
  Thread.FreeOnTerminate := True;
  Thread.Start;
end;

procedure TDemoThread.Execute;
var
  lHTTP: TIdHTTP;
  i: integer;
  X: Tstrings;
begin
  inherited;
  if Terminated then
    Exit;

  lHTTP := TIdHTTP.Create(nil);
  X := TStringList.Create;
  lHTTP.ReadTimeout := 30000;
  lHTTP.HandleRedirects := True;

  for i := 0 to List.Count - 1 do
    try
      X.Text := lHTTP.Get('https://instagram.com/' + List.Strings[i]);
      S := ExtractDelimitedString(X.Text);
      X.Clear;
      TThread.Synchronize(nil,
        procedure
        begin
          frmMain.Memo2.Lines.Add(List.Strings[i] + ' : ' + S);
        end);
    finally
    end;
end;

您的问题是您从未分配给线程 class:

List 成员
type
  TDemoThread = class(TThread)
  private
    procedure Abort;
  protected
    procedure Execute; override;
  public
    List: TStringList; <-- never assigned to, hence always nil
  end;

因此访问冲突。

您似乎正试图将 memo1 的内容传递给线程。我会这样做:

type
  TDemoThread = class(TThread)
  private
    FData: TStringList;
  protected
    procedure Execute; override;
  public
    constructor Create;
    destructor Destroy; override;
  end;

constructor TDemoThread.Create(Data: TStrings);
begin
  inherited Create(False);
  FData := TStringList.Create;
  FData.Assign(Data);
  FreeOnTerminate := True;
end;

destructor TDemoThread.Destroy;
begin
  FData.Free;
  inherited;
end;

procedure TDemoThread.Execute;
var
  lHTTP: TIdHTTP;
  i: integer;
  X: TStrings;
begin
  inherited;
  if Terminated then
    Exit;

  lHTTP := TIdHTTP.Create(nil);
  X := TStringList.Create;
  lHTTP.ReadTimeout := 30000;
  lHTTP.HandleRedirects := True;

  for i := 0 to FData.Count - 1 do
    try
      X.Text := lHTTP.Get('https://instagram.com/' + FData[i]);
      S := ExtractDelimitedString(X.Text);
      X.Clear;
      TThread.Synchronize(nil,
        procedure
        begin
          frmMain.Memo2.Lines.Add(FData[i] + ' : ' + S);
        end);
    finally
    end;
end;

procedure TfrmMain.StartButton1Click(Sender: TObject);
begin
  TDemoThread.Create(memo1.Lines);
end;

创建暂停然后立即启动是没有意义的。也不允许在 FreeOnTerminate 线程启动后保留对它的引用,所以我删除了它。

TDemoThread.Execute 中的代码泄漏,除非您 运行 专门在 ARC 平台上。 try/finally 毫无意义。而且您不需要字符串列表来保存单个 string。假设您没有使用 ARC,它应该是:

procedure TDemoThread.Execute;
var
  lHTTP: TIdHTTP;
  i: integer;
  S: string;
begin
  if Terminated then
    Exit;

  lHTTP := TIdHTTP.Create(nil);
  try
    lHTTP.ReadTimeout := 30000;
    lHTTP.HandleRedirects := True;

    for i := 0 to FData.Count - 1 do
    begin
      S := ExtractDelimitedString(lHTTP.Get('https://instagram.com/' + FData[i]));
      TThread.Synchronize(nil,
        procedure
        begin
          frmMain.Memo2.Lines.Add(FData[i] + ' : ' + S);
        end);
    end;
  finally
    lHTTP.Free;
  end;
end;

就我个人而言,我会避免从线程本身更新表单。线程在这里是数据生成器,而不是 GUI 管理器。所以让他们分开他们的关注。

我会让所有线程将结果累积到同一个共享容器中,然后创建一个 GUI 线程来轮询该容器。人眼很慢,Windows GUI 也很慢,因此您更新 GUI 的频率不应超过每秒 2 或 3 次。它只会浪费 CPU 加载并使表单模糊不清。

另一件事是避免使用慢 TStringList 除非需要它的额外功能(这会使其变慢)。常规 TList<string> 作为哑容器绰绰有余,而且速度更快。

type 
  TDemoThread = class;

  TfrmMain = class(TForm)
  private
    Fetchers: TThreadList<TDemoThread>;
    Data:     TThreadList<string>;

    property inProcess: Boolean read ... write SetInProcess;
  public
    procedure AfterConstruction; override;
    procedure BeforeDestruction; override;
  ....
  end;

  // this demo makes each thread per each line - that is actually a bad design
  // one better use a thread pool working over the same queue and only have
  // 20-40 worker threads for all the URLs
  TDemoThread = class(TThread)
  private
    URL: string;  
    List: TThreadList<string>;
    Tracker: TThreadList<TDemoThread>;
  protected
    procedure Execute; override;
  end;

procedure TfrmMain.BeforeDestruction;
begin
  while TThreadList.Count > 0 do
    Sleep(100);

  FreeAndNil( Fetchers );
  Data.Free;

  inherited;
end;

procedure TfrmMain.AfterConstruction;
begin
  Fetchers := TThreadList<TDemoThread>.Create;
  Data :=     TThreadList<string>.Create; 
  inherited;
end;

procedure TfrmMain.StartButton1Click(Sender: TObject);
var
  i: integer;
  List: Tstrings;
  worker: TDemoThread;
  URL: string;
begin
  If inProcess then exit;

  for URL in memo1.Lines do begin
    worker := TDemoThread.Create(True);  
    worker.FreeOnTerminate := True;
    worker.URL := URL;
    worker.List := Data;
    worker.Tracker := Fetchers;
    Fetchers.Add( worker );
  end;

  InProcess := True;

  for worker in Fetchers do
    worker.Start;
end;

procedure TfrmMain.SetInProcess(const Value: Boolean);
begin
  if Value = InProcess then exit; // form already is in this mode

  FInProcess := Value;

  memo1.ReadOnly := Value;
  StartButton.Enabled := not Value;
  if Value then begin
     Memo2.Lines.Clear;
     Data.Clear;
  end;

  Timer1.Delay := 500; // twice per second
  Timer1.Enabled := Value;

  If not Value then  // for future optimisation - make immediate mode change 
     FlushData;      // when last worker thread quits, no waiting for timer event

  If not Value then
     ShowMessage('Work complete');
end;

procedure TfrmMain.Timer1Timer(const Sender: TObject);
begin
  FlushData;

  if Fetchers.Count <= 0 then
     InProcess := False;
end;

procedure TfrmMain.FlushData;
begin
  Data.LockList;  // next two operations should go as non-interruptible atom
  try
    Memo2.Lines.AddStrings( Data.ToArray() );
    Data.Clear;
  finally
    Data.UnLockList;
  end;
end;

procedure TDemoThread.Execute;
var
  lHTTP: TIdHTTP;
begin
  try 
    lHTTP := TIdHTTP.Create(nil);
    try
      lHTTP.ReadTimeout := 30000;
      lHTTP.HandleRedirects := True;

      S := ExtractDelimitedString( lHTTP.Get('https://instagram.com/' + URL) );

      List.Add( S );
    finally
      lHTTP.Destroy;
    end;
  finally
    Tracker.Remove( Self );
  end;
end;

PS。就个人而言,我也会使用 OmniThreads Library,因为它通常可以更轻松地维护数据生成线程。例如,仅仅管理你创建了多少线程就变成了设置一个 属性 并且确定所有线程何时完成它们的工作是另一个 oneliner。您真的不应该创建一千个线程来获取所有 URL,而应该在 Thread Pool 中有 10-20 个线程,它们会从 Input Queue 中获取 URL 并一个接一个地获取它们。我建议您在 http://otl.17slon.com/tutorials.htm 阅读有关 OTL 的 Parallel ForFork-Join 模式的内容 - 它可以使此类应用程序更简洁,更易于编写。 Pipeline 模式可能更适合此任务 - 因为您无论如何都准备 URL 列表作为源集合。 StartButtonClick 中一半的脚手架将消失,整个 TDemoThread class 也会消失。