Why is puppeteer reporting "UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!"?
Why is puppeteer reporting "UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!"?
我有一个简单的 node.js 脚本来捕获一些网页的屏幕截图。看来我在使用 async/await 时被绊倒了,但我不知道在哪里。我目前正在使用 puppeteer v1.11.0.
const puppeteer = require('puppeteer');
//a list of sites to screenshot
const papers =
{
nytimes: "https://www.nytimes.com/",
wapo: "https://www.washingtonpost.com/"
};
//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(function(browser){
//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){
const url = papers[paper];
return new Promise(async function(resolve, reject){
const page = await browser.newPage();
await page.setViewport({width:1024, height: 768});
//screenshot on first console message
page.once("console", async console_msg => {
await page.pdf({path: paper + '.pdf',
printBackground:true,
width:'1024px',
height:'768px',
margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"}
});
//close page
await page.close();
//resolve promise
resolve();
});
//go to page
await page.goto(url, {"waitUntil":["load", "networkidle0"]});
})
}
//step through the list of papers, calling the above load_page()
async function stepThru(){
for(var p in papers){
if(papers.hasOwnProperty(p)){
//wait to load page and screenshot before loading next page
await load_page(p);
}
}
//close browser after loop has finished (and all promises resolved)
await browser.close();
}
//kick it off
stepThru();
//getting this error message:
//UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!
});
Navigation failed because browser has disconnected
错误通常意味着启动 Puppeteer 的节点脚本没有等待 Puppeteer 操作完成就结束了。因此,正如您所说,这是一些等待的问题。
关于您的脚本,我做了一些修改以使其工作:
- 首先你不是在等待
stepThru
函数的(异步)结束所以改变
stepThru();
至
await stepThru();
和
puppeteer.launch({devtools:false}).then(function(browser){
至
puppeteer.launch({devtools:false}).then(async function(browser){
(我加了async
)
- 我更改了您管理
goto
和 page.once
承诺的方式
PDF 承诺现在是:
new Promise(async function(resolve, reject){
//screenshot on first console message
page.once("console", async () => {
await page.pdf({
path: paper + '.pdf',
printBackground:true,
width:'1024px',
height:'768px',
margin: {
top:"0px",
right:"0px",
bottom:"0px",
left:"0px"
}
});
resolve();
});
})
它只有一个职责,就是创建 PDF。
- 然后我用
Promise.all
管理了 page.goto
和 PDF 承诺
await Promise.all([
page.goto(url, {"waitUntil":["load", "networkidle2"]}),
new Promise(async function(resolve, reject){
// ... pdf creation as above
})
]);
- 我把
page.close
移到了 Promise.all
之后
await Promise.all([
// page.goto
// PDF creation
]);
await page.close();
resolve();
现在它可以工作了,这里是完整的工作脚本:
const puppeteer = require('puppeteer');
//a list of sites to screenshot
const papers =
{
nytimes: "https://www.nytimes.com/",
wapo: "https://www.washingtonpost.com/"
};
//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(async function(browser){
//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){
const url = papers[paper];
return new Promise(async function(resolve, reject){
const page = await browser.newPage();
await page.setViewport({width:1024, height: 768});
await Promise.all([
page.goto(url, {"waitUntil":["load", "networkidle2"]}),
new Promise(async function(resolve, reject){
//screenshot on first console message
page.once("console", async () => {
await page.pdf({path: paper + '.pdf', printBackground:true, width:'1024px', height:'768px', margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"} });
resolve();
});
})
]);
await page.close();
resolve();
})
}
//step through the list of papers, calling the above load_page()
async function stepThru(){
for(var p in papers){
if(papers.hasOwnProperty(p)){
//wait to load page and screenshot before loading next page
await load_page(p);
}
}
await browser.close();
}
await stepThru();
});
请注意:
我将 networkidle0
更改为 networkidle2
因为 nytimes.com 网站需要很长时间才能到达 0 网络请求状态(因为 AD 等).您可以等待 networkidle0
显然,但这取决于您,这超出了您的问题范围(在这种情况下增加 page.goto
超时)。
www.washingtonpost.com
站点出现 TOO_MANY_REDIRECTS
错误,因此我更改为 washingtonpost.com
,但我认为您应该对此进行更多调查。为了测试脚本,我多次使用 nytimes
网站和其他网站。再说一次:这不在你的问题范围内。
如果您需要更多帮助,请告诉我
我在系统盘满的时候出现了同样的错误
我有一个简单的 node.js 脚本来捕获一些网页的屏幕截图。看来我在使用 async/await 时被绊倒了,但我不知道在哪里。我目前正在使用 puppeteer v1.11.0.
const puppeteer = require('puppeteer');
//a list of sites to screenshot
const papers =
{
nytimes: "https://www.nytimes.com/",
wapo: "https://www.washingtonpost.com/"
};
//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(function(browser){
//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){
const url = papers[paper];
return new Promise(async function(resolve, reject){
const page = await browser.newPage();
await page.setViewport({width:1024, height: 768});
//screenshot on first console message
page.once("console", async console_msg => {
await page.pdf({path: paper + '.pdf',
printBackground:true,
width:'1024px',
height:'768px',
margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"}
});
//close page
await page.close();
//resolve promise
resolve();
});
//go to page
await page.goto(url, {"waitUntil":["load", "networkidle0"]});
})
}
//step through the list of papers, calling the above load_page()
async function stepThru(){
for(var p in papers){
if(papers.hasOwnProperty(p)){
//wait to load page and screenshot before loading next page
await load_page(p);
}
}
//close browser after loop has finished (and all promises resolved)
await browser.close();
}
//kick it off
stepThru();
//getting this error message:
//UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!
});
Navigation failed because browser has disconnected
错误通常意味着启动 Puppeteer 的节点脚本没有等待 Puppeteer 操作完成就结束了。因此,正如您所说,这是一些等待的问题。
关于您的脚本,我做了一些修改以使其工作:
- 首先你不是在等待
stepThru
函数的(异步)结束所以改变
stepThru();
至
await stepThru();
和
puppeteer.launch({devtools:false}).then(function(browser){
至
puppeteer.launch({devtools:false}).then(async function(browser){
(我加了async
)
- 我更改了您管理
goto
和page.once
承诺的方式
PDF 承诺现在是:
new Promise(async function(resolve, reject){
//screenshot on first console message
page.once("console", async () => {
await page.pdf({
path: paper + '.pdf',
printBackground:true,
width:'1024px',
height:'768px',
margin: {
top:"0px",
right:"0px",
bottom:"0px",
left:"0px"
}
});
resolve();
});
})
它只有一个职责,就是创建 PDF。
- 然后我用
Promise.all
管理了
page.goto
和 PDF 承诺
await Promise.all([
page.goto(url, {"waitUntil":["load", "networkidle2"]}),
new Promise(async function(resolve, reject){
// ... pdf creation as above
})
]);
- 我把
page.close
移到了Promise.all
之后
await Promise.all([
// page.goto
// PDF creation
]);
await page.close();
resolve();
现在它可以工作了,这里是完整的工作脚本:
const puppeteer = require('puppeteer');
//a list of sites to screenshot
const papers =
{
nytimes: "https://www.nytimes.com/",
wapo: "https://www.washingtonpost.com/"
};
//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(async function(browser){
//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){
const url = papers[paper];
return new Promise(async function(resolve, reject){
const page = await browser.newPage();
await page.setViewport({width:1024, height: 768});
await Promise.all([
page.goto(url, {"waitUntil":["load", "networkidle2"]}),
new Promise(async function(resolve, reject){
//screenshot on first console message
page.once("console", async () => {
await page.pdf({path: paper + '.pdf', printBackground:true, width:'1024px', height:'768px', margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"} });
resolve();
});
})
]);
await page.close();
resolve();
})
}
//step through the list of papers, calling the above load_page()
async function stepThru(){
for(var p in papers){
if(papers.hasOwnProperty(p)){
//wait to load page and screenshot before loading next page
await load_page(p);
}
}
await browser.close();
}
await stepThru();
});
请注意:
我将
networkidle0
更改为networkidle2
因为 nytimes.com 网站需要很长时间才能到达 0 网络请求状态(因为 AD 等).您可以等待networkidle0
显然,但这取决于您,这超出了您的问题范围(在这种情况下增加page.goto
超时)。www.washingtonpost.com
站点出现TOO_MANY_REDIRECTS
错误,因此我更改为washingtonpost.com
,但我认为您应该对此进行更多调查。为了测试脚本,我多次使用nytimes
网站和其他网站。再说一次:这不在你的问题范围内。
如果您需要更多帮助,请告诉我
我在系统盘满的时候出现了同样的错误