Selebiun 爬虫超时问题 C#

Selebiun Crawler Timeout Issues C#

下面的代码是我 运行ning 并且在执行过程中我收到一个错误。

  public void GetCategoriesSelenium() {
            string javascript = System.IO.File.ReadAllText(@"GetCategory.js");
            CrawlerWebSeleniumJS.ExecuteScript("var finished;");
            CrawlerWebSeleniumJS.ExecuteScript("var all_categories;");
            CrawlerWebSeleniumJS.ExecuteScript("finished = false;");
            CrawlerWebSeleniumJS.ExecuteScript("all_categories = [];");

            CrawlerWebSelenium.Manage().Timeouts().SetScriptTimeout(TimeSpan.FromDays(1));
            CrawlerWebSelenium.Manage().Timeouts().SetPageLoadTimeout(TimeSpan.FromDays(1));
            CrawlerWebSelenium.Manage().Timeouts().ImplicitlyWait(TimeSpan.FromDays(1));

            AddToConsole("CRAWLER: GET - Categories");

            try {
                CrawlerWebSeleniumJS.ExecuteScript(javascript);
                }
            catch {
                }

            int ready = 2;

            for (int i = 0; i < ready; i++) {
                try {
                    if (CrawlerWebSeleniumJS.ExecuteScript("return finished").ToString() == "True") {
                        i = i++ + ready++;
                        }
                    else {
                        ready++;
                        }
                    }
                catch {

                    }
                }
            AddToCatsTreeSelenium();
            }

$('.p-pstctgry-lnk-ctgry').each(function (i) {
    var idBits = this.id.split('_');
    var theId = idBits[1];
    var theTitle = this.text;
    var subcategories = [];
    //initiate ajax request for json results
    $.ajax({
        async: false,
        type: 'GET',
        dataType: 'json',
        url: 'URL REMOVED',
        data: {
            nodeType: 'cat',
            level1id: theId
        }
    }).done(function (theJSON1) {
        var thelength1 = Object.keys(theJSON1['items']).length;
        //loop through found subs
        for (var i = 0; i < thelength1; i++) {
            //start of next recursive block to copy and paste inside
            var subsubcategories = [];
            //initiate ajax request for sub json results
            $.ajax({
                async: false,
                type: 'GET',
                dataType: 'json',
                url: 'URL REMOVED',
                data: {
                    nodeType: 'cat',
                    level1id: theId,
                    level2id: theJSON1['items'][i]['id']
                }
            }).done(function (theJSON2) {
                var thelength2 = Object.keys(theJSON2['items']).length;
                for (var k = 0; k < thelength2; k++) {
                    //start of next recursive block to copy and paste inside
                    var subsubsubcategories = [];
                    //initiate ajax request for sub json results
                    if ((theJSON2['items'][k]['id'] != 'OFFER') && (theJSON2['items'][k]['id'] != 'WANTED')) {
                        $.ajax({
                            async: false,
                            type: 'GET',
                            dataType: 'json',
                            url: 'URL REMOVED',
                            data: {
                                nodeType: 'cat',
                                level1id: theId,
                                level2id: theJSON1['items'][i]['id'],
                                level3id: theJSON2['items'][k]['id']
                            }
                        }).done(function (theJSON3) {
                            var thelength3 = Object.keys(theJSON3['items']).length;
                            for (var l = 0; l < thelength3; l++) {
                                console.log('---' + theJSON3['items'][l]['value'] + ' ' + theJSON3['items'][l]['id']);
                                //store this subsub
                                subsubsubcategories.push({
                                    title: theJSON3['items'][l]['value'],
                                    id: theJSON3['items'][l]['id'],
                                    sub: ''
                                });
                            }
                            //end done theJSON
                        });
                    }
                    //end of next recursive block to copy and paste inside
                    console.log('--' + theJSON2['items'][k]['value'] + ' ' + theJSON2['items'][k]['id']);
                    //store this subsub
                    subsubcategories.push({
                        title: theJSON2['items'][k]['value'],
                        id: theJSON2['items'][k]['id'],
                        sub: subsubsubcategories
                    });
                }
                //end done theJSON
            });
            console.log('-' + theJSON1['items'][i]['value'] + ' ' + theJSON1['items'][i]['id']);
            //store this sub with -> subsub
            subcategories.push({
                title: theJSON1['items'][i]['value'],
                id: theJSON1['items'][i]['id'],
                sub: subsubcategories
            });
            //end of next recursive block to copy and paste inside

            //end sub loop
        }
        console.log('' + theTitle + ' ' + theId);
        //store this cat with -> sub -> subsub
        all_categories.push({
            title: theTitle,
            id: theId,
            sub: subcategories
        });
        console.log(all_categories);
        //end first json subcat loop
    });
    //end main cat scan loop
});
finished = true;

上面的代码是我运行的方法,它下面的代码是纯javascript,是运行通过selenium。

所以发出第一个,当代码是 运行 selenium 锁定。我能理解。这个过程大约需要 4 分钟。 60 秒后超时并出现错误

The HTTP request to the remote WebDriver server for URL timed out after 60 seconds.

这真的很烦人并且会锁住系统。我知道一个非常快速和简单的方法来解决这个问题。 (Thread.Sleep(300000) 真恶心...

我的想法是,也许是 运行 正在执行一个 javascript 查询并等待它完成,我不断地用更多 javascript 请求冲击 Selenium,这些请求按预期超时.

还有其他想法吗?

驱动程序的构造函数应该有一个重载,其中包含一个 TimeSpan 指示 .NET 绑定使用的 HTTP 客户端与远程端通信的超时。将其设置为适当大的值应该足以让操作完成。