PhantomJS 在 150-180 个 url 后崩溃
PhantomJS crashes after 150-180 urls
到目前为止,我的脚本工作正常,按顺序逐行加载文本文件中的每个页面(page.open 是异步的,页面对象是全局的 = 覆盖新请求,这是一个大clusterfuck 运行同时使用多个 page.open()),匹配特定域的每个请求并从中打印 JSON 值。
但是,如果我使用包含超过 150 个链接的 .txt 文件,它每次都会崩溃,大多数情况下没有错误消息,也没有像这样的崩溃转储:
PhantomJS has crashed. Please read the crash reporting guide at
http://phantomjs.org/crash-reporting.html and file a bug report at
https://github.com/ariya/phantomjs/issues/new.
Unfortunately, no crash dump is available.
(Is %TEMP% (C:\Users\XXX\AppData\Local\Temp) a directory you cannot write?)
如果我 运行 多次,我可以很容易地重现它,无论我是一次还是一个接一个地重现。
我怎样才能防止崩溃?如果 Phantom 无法处理,我的脚本就没用了。
但有时我会得到一个故障转储:
PhantomJS has crashed. Please read the crash reporting guide at
http://phantomjs.org/crash-reporting.html and file a bug report at
https://github.com/ariya/phantomjs/issues/new.
Please attach the crash dump file:
C:\Users\XXX\AppData\Local\Temp\a4fd6af6-1244-44d3-8938-3aabe298c2fa.dmp
在极少数情况下,我什至会收到一条错误消息,Process Explorer 说该进程一次最多有 21 个线程
QThread::start: Failed to create thread ()
console.log('Hello, world!');
var fs = require('fs');
var stream = fs.open('500sitemap.txt', 'r');
var webPage = require('webpage');
var i = 1;
var hasFound = Array();
var hasonLoadFinished = Array();
function handle_page(line) {
var page = webPage.create();
page.settings.loadImages = false;
page.open(line, function() {});
page.onResourceRequested = function(requestData, request) {
var match = requestData.url.match(/example.de\/ac/g)
if (match != null) {
hasFound[line] = true;
var targetString = decodeURI(JSON.stringify(requestData.url));
var klammerauf = targetString.indexOf("{");
var jsonobjekt = targetString.substr(klammerauf, (targetString.indexOf("}") - klammerauf) + 1);
targetJSON = (decodeURIComponent(jsonobjekt));
var t = JSON.parse(targetJSON);
console.log(i + " " + t + " " + t['id']);
request.abort;
} else {
//hasFound = false;
return;
}
};
page.onLoadFinished = function(status) {
if (!hasonLoadFinished[line]) {
hasonLoadFinished[line] = true;
if (!hasFound[line]) {
console.log(i + " :NOT FOUND: " + line);
console.log("");
}
i++;
setTimeout(page.close, 200);
nextPage();
}
}
};
function nextPage() {
var line = stream.readLine();
if (!line) {
end = Date.now();
console.log("");
console.log(((end - start) / 1000) + " Sekunden");
phantom.exit(0);
}
hasFound[line] = false;
hasonLoadFinished[line] = false;
handle_page(line);
}
start = Date.now();
nextPage();
/edit 在 3836 个链接后与 1.9.8 崩溃....返回开始......
看来是2.0版本的问题。出于挫败感测试了 1.9.8 - 它有效,使用的 RAM 减少了 60%,1000 个 Urls 没有崩溃。
github 的崩溃报告已完成,真是令人欣慰,它有效。
到目前为止,我的脚本工作正常,按顺序逐行加载文本文件中的每个页面(page.open 是异步的,页面对象是全局的 = 覆盖新请求,这是一个大clusterfuck 运行同时使用多个 page.open()),匹配特定域的每个请求并从中打印 JSON 值。
但是,如果我使用包含超过 150 个链接的 .txt 文件,它每次都会崩溃,大多数情况下没有错误消息,也没有像这样的崩溃转储:
PhantomJS has crashed. Please read the crash reporting guide at http://phantomjs.org/crash-reporting.html and file a bug report at https://github.com/ariya/phantomjs/issues/new. Unfortunately, no crash dump is available. (Is %TEMP% (C:\Users\XXX\AppData\Local\Temp) a directory you cannot write?)
如果我 运行 多次,我可以很容易地重现它,无论我是一次还是一个接一个地重现。 我怎样才能防止崩溃?如果 Phantom 无法处理,我的脚本就没用了。
但有时我会得到一个故障转储:
PhantomJS has crashed. Please read the crash reporting guide at http://phantomjs.org/crash-reporting.html and file a bug report at https://github.com/ariya/phantomjs/issues/new. Please attach the crash dump file: C:\Users\XXX\AppData\Local\Temp\a4fd6af6-1244-44d3-8938-3aabe298c2fa.dmp
在极少数情况下,我什至会收到一条错误消息,Process Explorer 说该进程一次最多有 21 个线程
QThread::start: Failed to create thread ()
console.log('Hello, world!');
var fs = require('fs');
var stream = fs.open('500sitemap.txt', 'r');
var webPage = require('webpage');
var i = 1;
var hasFound = Array();
var hasonLoadFinished = Array();
function handle_page(line) {
var page = webPage.create();
page.settings.loadImages = false;
page.open(line, function() {});
page.onResourceRequested = function(requestData, request) {
var match = requestData.url.match(/example.de\/ac/g)
if (match != null) {
hasFound[line] = true;
var targetString = decodeURI(JSON.stringify(requestData.url));
var klammerauf = targetString.indexOf("{");
var jsonobjekt = targetString.substr(klammerauf, (targetString.indexOf("}") - klammerauf) + 1);
targetJSON = (decodeURIComponent(jsonobjekt));
var t = JSON.parse(targetJSON);
console.log(i + " " + t + " " + t['id']);
request.abort;
} else {
//hasFound = false;
return;
}
};
page.onLoadFinished = function(status) {
if (!hasonLoadFinished[line]) {
hasonLoadFinished[line] = true;
if (!hasFound[line]) {
console.log(i + " :NOT FOUND: " + line);
console.log("");
}
i++;
setTimeout(page.close, 200);
nextPage();
}
}
};
function nextPage() {
var line = stream.readLine();
if (!line) {
end = Date.now();
console.log("");
console.log(((end - start) / 1000) + " Sekunden");
phantom.exit(0);
}
hasFound[line] = false;
hasonLoadFinished[line] = false;
handle_page(line);
}
start = Date.now();
nextPage();
/edit 在 3836 个链接后与 1.9.8 崩溃....返回开始......
看来是2.0版本的问题。出于挫败感测试了 1.9.8 - 它有效,使用的 RAM 减少了 60%,1000 个 Urls 没有崩溃。
github 的崩溃报告已完成,真是令人欣慰,它有效。