curl_multi_exec 需要很长时间才能回复

curl_multi_exec is taking forever to give response

我的项目需要使用 curl multi execute.The 对存储在数组中的每个用户名进行 url 用户名数组的大小几乎是 45k,直到现在我已经创建了另一个数组我想点击 45k urls,然后为了有效地发送请求,我已经将 url 数组分成大小为 200 each.Then 的块,我已将每个分块数组传递给 multi_curl_execute得到响应,但问题是接收所有 45k 的响应需要太多时间 requests.I 已经打印了响应数组并且它按预期不断增加但是打印所有响应它花费了太多 time.Kindly 帮助我,因为我必须通过 tomorrow.I 实现我的目标,下面给出的是我的代码

$array1=[1,2,3,4,5,6.....45000];

现在用每个用户名作为查询字符串创建 url

foreach($array1 as $arr)
{
$url[]='abc.com?u='.$arr;
}

//创建块

$chunk[]=array_chunk($url,200,true);

//现在发送每个块

for($i=0;$i<sizeof($chunk);$i++)
{
foreach($chunk[$i] as $c_arr)
{
array_push($res,multiRequest($c_arr));
}
}

//我的multi_curl函数

function multiRequest($data,$options = array())
{
$curly = array();
$result = array();
$mh = curl_multi_init();
$ua = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13';
foreach ($data as $id => $d) 
{
$curly[$id]= curl_init();
curl_setopt($curly[$id], CURLOPT_URL,$d);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER,true);
curl_setopt($curly[$id], CURLOPT_USERAGENT, $ua);
curl_setopt($curly[$id], CURLOPT_AUTOREFERER, true);
curl_setopt($curly[$id], CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curly[$id], CURLOPT_MAXREDIRS, 20);
curl_setopt($curly[$id], CURLOPT_HTTPGET, true);
curl_setopt($curly[$id], CURLOPT_HEADER,0);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER,1);
curl_multi_add_handle($mh, $curly[$id]);
}
$running = null;
do {
curl_multi_exec($mh, $running);
} while($running > 0);

foreach($curly as $id => $c) 
{
$result[$id] = curl_multi_getcontent($c);
curl_multi_remove_handle($mh, $c);
}
curl_multi_close($mh);

return $result;
}

请告诉我我应该怎么做,因为它花了将近 25-30 分钟来传递所有 45000 requests.And 的响应,现在我在我的本地机器上 运行 这个脚本,而稍后它将被安排为实时服务器上的 cron 作业

您是否尝试过多处理而不是 curl_multi?也许那更快?不会是第一次。

尝试

<?php

$code = <<<'CODE'
<?php
$ch=curl_init();
curl_setopt_array($ch,array(
CURLOPT_URL=>'abc.com?u='.urlencode($argv[1]),
CURLOPT_ENCODING=>"",
CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13',
CURLOPT_AUTOREFERER=>true,
CURLOPT_FOLLOWLOCATION=>true,
CURLOPT_MAXREDIRS=>20
));
curl_exec($ch);
curl_close($ch);

CODE;
$jobFileh = tmpfile ();
$jobFile = stream_get_meta_data ( $jobFileh ) ['uri'];
file_put_contents ( $jobFile, $code );

$jobs = array ();

for($i = 1; $i <= 45000; ++ $i) {
    $jobs [] = '/usr/bin/php ' . escapeshellarg ( $jobFile ) . ' ' . escapeshellarg ( ( string ) $i );
}
$starttime = microtime ( true );
$ret = hhb_exec_multi1 ( $jobs, 200 );
$seconds_used = microtime ( true ) - $starttime;
var_dump ( $ret, $seconds_used );
die ();
class hhb_exec_multi1_ret {
    public $cmd;
    public $ret;
    public $stdout;
    public $stderr;
    function __construct(array $attributes) {
        foreach ( $attributes as $name => $val ) {
            $this->$name = $val;
        }
    }
}
/**
 *
 * @param string[] $cmds
 * @param int $max_concurrent
 * @throws InvalidArgumentException
 * @return hhb_exec_multi1_ret[]
 */
function hhb_exec_multi1(array $cmds, int $max_concurrent = 10, $finished_callback = NULL): array {
    // TODO: more error checking, if proc_create fail, out of ram, tmpfile() fail, etc
    {
        // input validation
        if ($max_concurrent < 1) {
            throw new InvalidArgumentException ( '$max_concurrent must be above 0... and less or equal to' . PHP_INT_MAX );
        }
        foreach ( $cmds as $tmp ) {
            if (! is_string ( $tmp )) {
                throw new InvalidArgumentException ( '$cmds must be an array of strings!' );
            }
        }
    }
    $ret = array ();
    $running = array ();
    foreach ( $cmds as $key => $cmd ) {
        $current = array (
                'cmd' => $cmd,
                'ret' => - 1,
                'stdout' => tmpfile (),
                'stderr' => tmpfile (),
                'key' => $key 
        );
        $pipes = [ ];
        $descriptorspec = array (
                0 => array (
                        "pipe",
                        "rb" 
                ),
                1 => array (
                        "file",
                        stream_get_meta_data ( $current ['stdout'] ) ['uri'],
                        "wb" 
                ),
                2 => array (
                        "file",
                        stream_get_meta_data ( $current ['stderr'] ) ['uri'],
                        "wb" 
                )  // stderr is a file to write to
        );
        while ( count ( $running ) >= $max_concurrent ) {
            // echo ".";
            usleep ( 100 * 1000 );
            foreach ( $running as $runningkey => $check ) {
                $stat = proc_get_status ( $check ['proc'] );
                if ($stat ['running']) {
                    continue;
                }
                proc_close ( $check ['proc'] );
                $check ['ret'] = $stat ['exitcode'];
                $stdout = file_get_contents ( stream_get_meta_data ( $check ['stdout'] ) ['uri'] );
                fclose ( $check ['stdout'] );
                $check ['stdout'] = $stdout;
                $stderr = file_get_contents ( stream_get_meta_data ( $check ['stderr'] ) ['uri'] );
                fclose ( $check ['stderr'] );
                $check ['stderr'] = $stderr;
                $checkkey = $check ['key'];
                unset ( $check ['key'] );
                unset ( $check ['proc'] );
                $tmp = ($ret [$checkkey] = new hhb_exec_multi1_ret ( $check ));
                unset ( $running [$runningkey] );
                if (! empty ( $finished_callback )) {
                    $finished_callback ( $tmp );
                }
            }
        }
        $current ['proc'] = proc_open ( $cmd, $descriptorspec, $pipes );
        fclose ( $pipes [0] ); // do it like this because we don't want the children to inherit our stdin, which is the default behaviour if [0] is not defined.
        $running [] = $current;
    }
    while ( count ( $running ) > 0 ) {
        // echo ",";
        usleep ( 100 * 1000 );
        foreach ( $running as $runningkey => $check ) {
            $stat = proc_get_status ( $check ['proc'] );
            if ($stat ['running']) {
                continue;
            }
            proc_close ( $check ['proc'] );
            $check ['ret'] = $stat ['exitcode'];
            $stdout = file_get_contents ( stream_get_meta_data ( $check ['stdout'] ) ['uri'] );
            fclose ( $check ['stdout'] );
            $check ['stdout'] = $stdout;
            $stderr = file_get_contents ( stream_get_meta_data ( $check ['stderr'] ) ['uri'] );
            fclose ( $check ['stderr'] );
            $check ['stderr'] = $stderr;
            $checkkey = $check ['key'];
            unset ( $check ['key'] );
            unset ( $check ['proc'] );
            $tmp = ($ret [$checkkey] = new hhb_exec_multi1_ret ( $check ));
            unset ( $running [$runningkey] );
            if (! empty ( $finished_callback )) {
                $finished_callback ( $tmp );
            }
        }
    }
    return $ret;
}

当我 运行 将笔记本电脑上的这段代码传输到本地 nginx 服务器时,它在循环设置为 45000 的情况下执行了 6 分 39 秒(399 秒)。

编辑:wups,忘记将代码写入作业文件 (file_put_contents),已修复。