while 循环的 casperjs 脚本中的异步进程
Asynchronous Process inside a casperjs script for while loop
我正在使用 while 循环打开 csv 文件中的用户名列表。对于其中的每个用户名,我必须打开一个 URL 并将页面转储到一个文件中。
然而,casper.thenOpen总是只运行一次。我从 Asynchronous Process inside a javascript for loop 了解到这是因为它是一个异步过程。我需要对下面的代码执行相同的操作:
casper.then(function(){
stream = fs.open('usernames.csv', 'r');
targetusername = stream.readLine();
i = 0;
while(targetusername) {
var url = "http://blablalb" + targetusername;
console.log("current url is " + url);
casper.thenOpen(url, function() {
console.log ("I am here");
fs.write(targetusername,this.getTitle() + "\n",'w');
fs.write(targetusername,this.page.plainText,'a');
});
targetusername = stream.readLine();
i++;
}
});
casper.thenOpen 总是只运行一次,给我这个输出:
current url is first_url
current url is second_url
current url is third_url
I am here
我需要的是这样的
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
我正在努力让 while 循环 运行 正确!
我认为该代码没有任何问题。我写这段代码是为了测试(基本上,它和你的代码一样):
var casper = require('casper').create();
var url_list = [
'http://phantomjs.org/',
'https://github.com/',
'https://nodejs.org/'
]
casper.start()
casper.then(function () {
for (var i = 0; i < url_list.length; i++) {
casper.echo('assign a then step for ' + url_list[i])
casper.thenOpen(url_list[i], function () {
casper.echo("current url is " + casper.getCurrentUrl());
})
}
}
)
casper.run()
输出:
assign a then step for http://phantomjs.org/
assign a then step for https://github.com/
assign a then step for https://nodejs.org/
current url is http://phantomjs.org/
current url is https://github.com/
current url is https://nodejs.org/en/
如您所见,它每隔 url 打开一次。
让我们来回答您的问题:
Q1:为什么不是这样输出:
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
A1:因为 CasperJS 首先分配步骤,更准确地说,将步骤推入堆栈,然后从该堆栈弹出步骤,然后 运行 该步骤。请查看 great answer 了解更多信息。
Q2:为什么不输出为(为什么循环运行只有1次):
current url is first_url
current url is second_url
current url is third_url
I am here
I am here
I am here
A2:您可能会遇到一些异常情况,如开秒 url 和 PhantomJS 崩溃。此代码可以帮助您了解会发生什么:
var casper = require('casper').create({
verbose: true,
logLevel: "debug",
}); //see more logs
casper.on('error', function (msg, backtrace) {
var msgStack = ['PHANTOM ERROR: ' + msg];
if (backtrace && backtrace.length) {
msgStack.push('TRACE:');
backtrace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
this.log(msgStack.join('\n'), "error");
});// watch the error event which PhantomJS emits
我可以达到我需要的确切输出:
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
使用repeat函数,如下:
casper.then(function(){
stream = fs.open('usernames.csv', 'r');
casper.repeat(3, function() {
targetusername = stream.readLine();
var url = "http://blablalb" + targetusername;
console.log("current url is " + url);
casper.thenOpen(url, function() {
console.log ("I am here");
fs.write(targetusername,this.getTitle() + "\n",'w');
fs.write(targetusername,this.page.plainText,'a');
});
}
)});
我正在使用 while 循环打开 csv 文件中的用户名列表。对于其中的每个用户名,我必须打开一个 URL 并将页面转储到一个文件中。
然而,casper.thenOpen总是只运行一次。我从 Asynchronous Process inside a javascript for loop 了解到这是因为它是一个异步过程。我需要对下面的代码执行相同的操作:
casper.then(function(){
stream = fs.open('usernames.csv', 'r');
targetusername = stream.readLine();
i = 0;
while(targetusername) {
var url = "http://blablalb" + targetusername;
console.log("current url is " + url);
casper.thenOpen(url, function() {
console.log ("I am here");
fs.write(targetusername,this.getTitle() + "\n",'w');
fs.write(targetusername,this.page.plainText,'a');
});
targetusername = stream.readLine();
i++;
}
});
casper.thenOpen 总是只运行一次,给我这个输出:
current url is first_url
current url is second_url
current url is third_url
I am here
我需要的是这样的
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
我正在努力让 while 循环 运行 正确!
我认为该代码没有任何问题。我写这段代码是为了测试(基本上,它和你的代码一样):
var casper = require('casper').create();
var url_list = [
'http://phantomjs.org/',
'https://github.com/',
'https://nodejs.org/'
]
casper.start()
casper.then(function () {
for (var i = 0; i < url_list.length; i++) {
casper.echo('assign a then step for ' + url_list[i])
casper.thenOpen(url_list[i], function () {
casper.echo("current url is " + casper.getCurrentUrl());
})
}
}
)
casper.run()
输出:
assign a then step for http://phantomjs.org/
assign a then step for https://github.com/
assign a then step for https://nodejs.org/
current url is http://phantomjs.org/
current url is https://github.com/
current url is https://nodejs.org/en/
如您所见,它每隔 url 打开一次。
让我们来回答您的问题:
Q1:为什么不是这样输出:
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
A1:因为 CasperJS 首先分配步骤,更准确地说,将步骤推入堆栈,然后从该堆栈弹出步骤,然后 运行 该步骤。请查看 great answer 了解更多信息。
Q2:为什么不输出为(为什么循环运行只有1次):
current url is first_url
current url is second_url
current url is third_url
I am here
I am here
I am here
A2:您可能会遇到一些异常情况,如开秒 url 和 PhantomJS 崩溃。此代码可以帮助您了解会发生什么:
var casper = require('casper').create({
verbose: true,
logLevel: "debug",
}); //see more logs
casper.on('error', function (msg, backtrace) {
var msgStack = ['PHANTOM ERROR: ' + msg];
if (backtrace && backtrace.length) {
msgStack.push('TRACE:');
backtrace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
this.log(msgStack.join('\n'), "error");
});// watch the error event which PhantomJS emits
我可以达到我需要的确切输出:
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
使用repeat函数,如下:
casper.then(function(){
stream = fs.open('usernames.csv', 'r');
casper.repeat(3, function() {
targetusername = stream.readLine();
var url = "http://blablalb" + targetusername;
console.log("current url is " + url);
casper.thenOpen(url, function() {
console.log ("I am here");
fs.write(targetusername,this.getTitle() + "\n",'w');
fs.write(targetusername,this.page.plainText,'a');
});
}
)});