Puppeteer 如何在失败时重试 url 延迟获取
Puppeteer how to retry url fetch with delay if it failed
我尝试使用 puppeteer library.
编写简单的 web-scraper
当我通过 page.goto
通过 url 获取页面时,如果失败,我需要重试,即响应代码 >= 400。
我的片段:
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({width: 1024, height: 768});
await page.setDefaultNavigationTimeout(0);
await page.goto('https://google.com');
await browser.close();
process.exit();
})();
如果 response.code
>= 400,我需要实施失败策略重试 url。
我需要延迟等于 retryNumber
* 1000ms:
- 第一次尝试需要 1000 毫秒;
- 第二次尝试 2000 毫秒;
- 第三次尝试需要 3000 毫秒,依此类推。
如果 retryNumber
超过 maxRetryNumber
,应拒绝承诺。
谁知道如何通过代码实现这个?有没有现成的可以使用的packets或者snippets来达到目的?
然后您可以使用一个简单的 for 循环来执行重试(当您的请求成功时退出 for 循环):
'use strict';
const puppeteer = require('puppeteer');
const delay = (ms) => {
return new Promise(resolve => setTimeout(resolve, ms));
};
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({width: 1024, height: 768});
await page.setDefaultNavigationTimeout(0);
const maxRetryNumber = 10;
let success = false;
for (let retryNumber = 1; retryNumber <= maxRetryNumber; retryNumber++) {
const response = await page.goto('https://google.com');
if (response.status() < 400) {
success = true;
break;
}
await delay(1000 * retryNumber);
}
if (!success) {
// do something
}
await browser.close();
process.exit();
})();
.
var maxRetryNumber = 10 ;
var retryNumber = 0 ;
scrape();
async function scrape(){
retryNumber++;
if(retryNumber >= maxRetryNumber )
{
console.log(' retryNumber exceeded maxRetryNumber ! ');
return ;
}
try {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({width: 1024, height: 768});
await page.setDefaultNavigationTimeout(0);
await page.waitFor(retryNumber*1000);
let response = await page.goto('https://google.com');
await browser.close();
if(response.status() >= 400)
scrape();
else
{
console.log('ALL OK');
}
}
catch(e){
scrape();
}
}
我尝试使用 puppeteer library.
编写简单的 web-scraper当我通过 page.goto
通过 url 获取页面时,如果失败,我需要重试,即响应代码 >= 400。
我的片段:
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({width: 1024, height: 768});
await page.setDefaultNavigationTimeout(0);
await page.goto('https://google.com');
await browser.close();
process.exit();
})();
如果 response.code
>= 400,我需要实施失败策略重试 url。
我需要延迟等于 retryNumber
* 1000ms:
- 第一次尝试需要 1000 毫秒;
- 第二次尝试 2000 毫秒;
- 第三次尝试需要 3000 毫秒,依此类推。
如果 retryNumber
超过 maxRetryNumber
,应拒绝承诺。
谁知道如何通过代码实现这个?有没有现成的可以使用的packets或者snippets来达到目的?
然后您可以使用一个简单的 for 循环来执行重试(当您的请求成功时退出 for 循环):
'use strict';
const puppeteer = require('puppeteer');
const delay = (ms) => {
return new Promise(resolve => setTimeout(resolve, ms));
};
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({width: 1024, height: 768});
await page.setDefaultNavigationTimeout(0);
const maxRetryNumber = 10;
let success = false;
for (let retryNumber = 1; retryNumber <= maxRetryNumber; retryNumber++) {
const response = await page.goto('https://google.com');
if (response.status() < 400) {
success = true;
break;
}
await delay(1000 * retryNumber);
}
if (!success) {
// do something
}
await browser.close();
process.exit();
})();
var maxRetryNumber = 10 ;
var retryNumber = 0 ;
scrape();
async function scrape(){
retryNumber++;
if(retryNumber >= maxRetryNumber )
{
console.log(' retryNumber exceeded maxRetryNumber ! ');
return ;
}
try {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({width: 1024, height: 768});
await page.setDefaultNavigationTimeout(0);
await page.waitFor(retryNumber*1000);
let response = await page.goto('https://google.com');
await browser.close();
if(response.status() >= 400)
scrape();
else
{
console.log('ALL OK');
}
}
catch(e){
scrape();
}
}