casper.waitForSelector、超时和错误处理
casper.waitForSelector, timeout and error handling
我制作了一个快速脚本来从 Reddit 帖子列表中截取屏幕截图。该脚本从 json 文件中获取 reddit 网址(示例如下所示),然后访问每个页面以捕获屏幕截图。
该脚本适用于大多数情况。但是,如果不满足 casper.waitForSelector
条件,它会偶尔 hang/continue 到 运行。最终它会拖延整个服务器。我以为我做了足够的错误处理。我如何修改脚本以忽略任何不符合 casper.waitForSelector
条件的页面并继续进行下一个屏幕捕获项目?
JS
var casper = require('casper').create({
verbose: true,
viewportSize: {
width: 1280,
height: 720
},
logLevel: 'error',
pageSettings: {
ignoreSslErrors: true,
loadImages: true, // load images
loadPlugins: true, // do not load NPAPI plugins (Flash, Silverlight, ...)
webSecurityEnabled: false, // ajax
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
//Create random names for files
function randString(x) {
var s = "";
while (s.length < x && x > 0) {
var r = Math.random();
s += (r < 0.1 ? Math.floor(r * 100) : String.fromCharCode(Math.floor(r * 26) + (r > 0.5 ? 97 : 65)));
}
return s;
}
function getReddit(reddit, filename) {
casper.thenOpen(reddit, function() {
if (casper.exists("#thing_" + filename)) {
casper.waitForSelector("#thing_" + filename, function() {
casper.captureSelector(randString(10) + '.png', "#thing_" + filename, {
quality: 100
});
});
} else {
console.log("Reddit Error: " + filename);
}
});
}
var link = 'http://localhost/test.json';
casper.start(link);
casper.on("resource.received", function(resource) {
var results = this.evaluate(function(url) {
return __utils__.sendAJAX(url, "GET");
}, resource.url);
var x = JSON.parse(results);
//this.echo(x.length);
for (var i = 0; i < x.length; ++i) {
var reddit = x[i].post_url;
var filename = x[i].id;
getReddit(reddit, filename);
}
});
casper.run();
JSON
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4marhg/til_that_in_the_16th_century_christians_called/",
bit_id: "l6KE0vzMmgQ",
id: "t3_4marhg"
},
{
post_url: "https://www.reddit.com/r/videos/comments/4mbbab/man_ignores_museum_rules_touches_priceless_clock/",
bit_id: "2GK22rGYWKx",
id: "t3_4mbbab"
},
{
post_url: "https://www.reddit.com/r/space/comments/4mc1av/an_alien_world_67p_as_seen_by_rosetta_two_days_ago/",
id: "t3_4mc1av"
},
{
post_url: "https://www.reddit.com/r/worldnews/comments/4mc8uv/young_fish_become_hooked_on_eating_plastic_in_the/",
id: "t3_4mc8uv"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mcda3/til_that_the_giant_tortoise_did_not_receive_a/",
id: "t3_4mcda3"
},
{
post_url: "https://www.reddit.com/r/science/comments/4mcl0y/a_new_study_has_shown_that_mothers_who_are/",
id: "t3_4mcl0y"
},
{
post_url: "https://www.reddit.com/r/news/comments/4mcveg/bp_agrees_to_pay_175_million_to_settle_claims_by/",
id: "t3_4mcveg"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mdddw/til_that_when_a_british_captive_officer/",
id: "t3_4mdddw"
}
如果您向 waitForSelector
(第 3 个参数)提供 onTimeout
回调函数,那么它会在出错时执行而不是默认行为(停止脚本)。
您可以传入空函数或带有一些日志记录的函数:
casper.waitForSelector("#thing_" + filename, function _then() {
this.captureSelector(randString(10) + '.png', "#thing_" + filename, {
quality: 100
});
}, function _onTimeout(){
this.echo("#thing_" + filename + " not found", "WARNING");
});
如果您希望每个函数都有这种行为,您也可以将选项 casper.options.silentErrors
更改为 true
。
我制作了一个快速脚本来从 Reddit 帖子列表中截取屏幕截图。该脚本从 json 文件中获取 reddit 网址(示例如下所示),然后访问每个页面以捕获屏幕截图。
该脚本适用于大多数情况。但是,如果不满足 casper.waitForSelector
条件,它会偶尔 hang/continue 到 运行。最终它会拖延整个服务器。我以为我做了足够的错误处理。我如何修改脚本以忽略任何不符合 casper.waitForSelector
条件的页面并继续进行下一个屏幕捕获项目?
JS
var casper = require('casper').create({
verbose: true,
viewportSize: {
width: 1280,
height: 720
},
logLevel: 'error',
pageSettings: {
ignoreSslErrors: true,
loadImages: true, // load images
loadPlugins: true, // do not load NPAPI plugins (Flash, Silverlight, ...)
webSecurityEnabled: false, // ajax
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
//Create random names for files
function randString(x) {
var s = "";
while (s.length < x && x > 0) {
var r = Math.random();
s += (r < 0.1 ? Math.floor(r * 100) : String.fromCharCode(Math.floor(r * 26) + (r > 0.5 ? 97 : 65)));
}
return s;
}
function getReddit(reddit, filename) {
casper.thenOpen(reddit, function() {
if (casper.exists("#thing_" + filename)) {
casper.waitForSelector("#thing_" + filename, function() {
casper.captureSelector(randString(10) + '.png', "#thing_" + filename, {
quality: 100
});
});
} else {
console.log("Reddit Error: " + filename);
}
});
}
var link = 'http://localhost/test.json';
casper.start(link);
casper.on("resource.received", function(resource) {
var results = this.evaluate(function(url) {
return __utils__.sendAJAX(url, "GET");
}, resource.url);
var x = JSON.parse(results);
//this.echo(x.length);
for (var i = 0; i < x.length; ++i) {
var reddit = x[i].post_url;
var filename = x[i].id;
getReddit(reddit, filename);
}
});
casper.run();
JSON
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4marhg/til_that_in_the_16th_century_christians_called/",
bit_id: "l6KE0vzMmgQ",
id: "t3_4marhg"
},
{
post_url: "https://www.reddit.com/r/videos/comments/4mbbab/man_ignores_museum_rules_touches_priceless_clock/",
bit_id: "2GK22rGYWKx",
id: "t3_4mbbab"
},
{
post_url: "https://www.reddit.com/r/space/comments/4mc1av/an_alien_world_67p_as_seen_by_rosetta_two_days_ago/",
id: "t3_4mc1av"
},
{
post_url: "https://www.reddit.com/r/worldnews/comments/4mc8uv/young_fish_become_hooked_on_eating_plastic_in_the/",
id: "t3_4mc8uv"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mcda3/til_that_the_giant_tortoise_did_not_receive_a/",
id: "t3_4mcda3"
},
{
post_url: "https://www.reddit.com/r/science/comments/4mcl0y/a_new_study_has_shown_that_mothers_who_are/",
id: "t3_4mcl0y"
},
{
post_url: "https://www.reddit.com/r/news/comments/4mcveg/bp_agrees_to_pay_175_million_to_settle_claims_by/",
id: "t3_4mcveg"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mdddw/til_that_when_a_british_captive_officer/",
id: "t3_4mdddw"
}
如果您向 waitForSelector
(第 3 个参数)提供 onTimeout
回调函数,那么它会在出错时执行而不是默认行为(停止脚本)。
您可以传入空函数或带有一些日志记录的函数:
casper.waitForSelector("#thing_" + filename, function _then() {
this.captureSelector(randString(10) + '.png', "#thing_" + filename, {
quality: 100
});
}, function _onTimeout(){
this.echo("#thing_" + filename + " not found", "WARNING");
});
如果您希望每个函数都有这种行为,您也可以将选项 casper.options.silentErrors
更改为 true
。