如何在 puppeteer 中等待页面

How to wait pages in puppeteer

我有大量地址需要同时打开并获取数据。 由于服务器限制,我无法同时打开超过 10 个标签页。 如何等到 serialNumbers.forEach 中出现新的免费页面? 我知道我可以检查 browser.pages () < 10。但是如何让循环在空闲选项卡的一次迭代中等待而不进入下一次迭代?

const puppeteer = require('puppeteer');
let numberOfOpenPages;

let serialNumbers = ['FVFDT2GCQ6L7', 'DLXFG3X2DFJ1', 'FVFDT2GCQ6L7', 'DLXFG3X2DFJ1', 'FVFDT2GCQ6L7', 'DLXFG3X2DFJ1'];

async function processArray() {
    const browser = await puppeteer.launch({ headless: true });


    serialNumbers.forEach(async (item, i) => {
                
// new page and then get data 
                const page = await browser.newPage();
                await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36');

                await page.goto(`https://support.apple.com/en_US/specs/${item}`);

                await page.waitForSelector('.article_link');

                await page.click('.article_link');
                await page.waitForSelector('#article');

                const specs = await page.evaluate(() => {
                    let article = document.querySelector('h1').innerHTML;
                    return `${article}`;
                });

                console.log(specs);
                await page.close();
        
// close browser when all pages is closed 
        numberOfOpenPages = (await browser.pages()).length;
        console.log(numberOfOpenPages);

        if (numberOfOpenPages <= 1) {
            await browser.close();
        }
    })
}

processArray()

也许是这样的?

const browser = await puppeteer.launch({ headless: true });

const serialNumbers = ['FVFDT2GCQ6L7', 'DLXFG3X2DFJ1', 'FVFDT2GCQ6L7', 'DLXFG3X2DFJ1', 'FVFDT2GCQ6L7', 'DLXFG3X2DFJ1'];
const pageLimit = 10;

for (let i = 0; i < pageLimit; i++) {
  processPage(await browser.newPage());
}

async function processPage(page) {
    if (serialNumbers.length === 0) {
        await page.close();
        if ((await browser.pages()).length <= 1) await browser.close();
        return;
    }

    const serialNumber = serialNumbers.shift();
    await page.goto(`https://support.apple.com/en_US/specs/${serialNumber}`);

    // Process the page...

    processPage(page);
}

我使用 setTimeOut 实现了 但有时它会同时启动比需要更多的选项卡。但是误差很小。

如果有更多正确的解法,我会很高兴知道

// const { copyFileSync } = require('fs');
const puppeteer = require('puppeteer');
let numberOfOpenPages;
let i = 0;

let serialNumbers = ['FVFDT2GCQ6L7', 'DLXFG3X2DFJ1', 'FVFDT2GCQ6L7', 'DLXFG3X2DFJ1', 'FVFDT2GCQ6L7', 'DLXFG3X2DFJ1', 'FVFDT2GCQ6L7', 'DLXFG3X2DFJ1', 'FVFDT2GCQ6L7'];

async function newPage(serial, browser) {

    numberOfOpenPages = (await browser.pages()).length;
    console.log(numberOfOpenPages);
    if (numberOfOpenPages < 10) {
        // new page and then get data 
        const page = await browser.newPage();
        await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36');

        await page.goto(`https://support.apple.com/en_US/specs/${serialNumbers[serial]}`);

        await page.waitForSelector('.article_link');

        await page.click('.article_link');
        await page.waitForSelector('#article');

        const specs = await page.evaluate(() => {
            let article = document.querySelector('h1').innerHTML;
            return `${article}`;
        });

        console.log(specs);
        await page.close();

        // close browser when all pages is closed 
        numberOfOpenPages = (await browser.pages()).length;
        // console.log(numberOfOpenPages);

        // if (numberOfOpenPages <= 1) {
        //     await browser.close();
        //     console.log('браузер закрыт!')
        // }
    } else {
        setTimeout(() => {
            newPage(serial, browser);
        }, 3000); // не ставить меньше 1 сек чтобы не совпадало сразу в один свободный слот 
    }
}

async function processArray() {
    const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'], headless: true });
                     //  set your counter to 1

function myLoop () {           //  create a loop function
   setTimeout(function () {    //  call a 3s setTimeout when the loop is called
      console.log('запускаем', i); 
      newPage(i, browser);         //  your code here
      i++;                   //  increment the counter
      if (i < serialNumbers.length) { //if the counter < 10, call the loop function
         myLoop();             //  ..  again which will trigger another 
      } else console.log('end loop');                       //  ..  setTimeout()
   }, 500)
}

myLoop();   
 
}

processArray()