使用 Puppeter.js 获取 Urls 列表的屏幕截图

Take screenshots for list of Urls using Puppeter.js

假设我有 Urls 列表或 SVG 文件的完整路径,现在我想为每个 Urls 逐一截屏。

这里是测试代码,我是用来一张一张截图的,但是没有达到预期的效果!

此代码只为所有 url 启动一次无头 chrome 页面实例并且 node.js 抛出此错误

(node:3412) MaxListenersExceededWarning: Possible EventEmitter memory leak detected. 11 lifecycleevent listeners added. Use emitter.setMaxListeners() to increase limit

不过我想一张一张的截图

'use strict';

const fs        = require('fs');
const glob      = require('glob');
const validUrl  = require("valid-url")
const puppeteer = require('puppeteer');
const devices   = require('puppeteer/DeviceDescriptors');
const iPhone    = devices['iPhone 6'];

/**
 * Loading Application Config
 */
const inputUrl = 'http://www.google.com';

/**
 * Using Puppeteer.js
 */
console.log('-- Trying to Launch Puppeteer');
const browser = puppeteer.launch({
    headless: true
}).then(function(browserObj)
{
    console.log('-- Trying to Open New Page');
    browserObj.newPage().then(function(pageObj)
    {
        async function closeHeadlesssChrome(browserObj) {
            console.log('-- Trying to Close Chome Headless Window');
            await browserObj.close();
        }

        async function setChromeViewport(pageObj) {
            console.log('-- Trying to Update page viewPort');
            await pageObj.setViewport({
                width: 1366,
                height: 738,
                deviceScaleFactor: 1,
                isMobile: false,
                hasTouch: false,
                isLandscape: false
            });
        }

        var takeScreenshot = async function(pageObj, srcUrl) {
            console.log('-- Trying to Load Web Page ' + srcUrl);
            await pageObj.goto(srcUrl);

            console.log('-- Trying to Take Screenshot');
            await pageObj.screenshot({
                path: srcUrl + '.png',
                clip: {
                    x: 0,
                    y: 0,
                    width: 795,
                    height: 1125
                }
            })
        }


        // Input or Source Url
        const inputUrl = "C:/Users/ssp/Music/BR PUBLIC INTER COLLEGE";
        var matchedFiles = [];

        // Check if given Url/Path exists
        if (fs.existsSync(inputUrl)) 
        {
            const inputUrlObj = fs.statSync(inputUrl);
            if (inputUrlObj.isDirectory()) 
            {
                matchedFiles = glob.GlobSync(inputUrl + '/**/*.svg').found;
            } 
            else if (inputUrlObj.isFile()) 
            {
                matchedFiles.push(inputUrl );
            }
        } 
        else 
        {
            console.log('-- Input Url not exists')
            return closeHeadlesssChrome(browserObj);
        }

        setChromeViewport(pageObj);

        matchedFiles.map(function(srcUrl){
            takeScreenshot(pageObj, srcUrl);
        });
    });
});

谢谢

使用 for..ofasync-await 而不是 .map.map 不会暂停执行,但 await 会。

browserObj.newPage().then(async function(pageObj) { // <-- turn the main function into async function
 // ... many lines later
 for(let srcUrl of matchedFiles){
  await takeScreenshot(pageObj, srcUrl);
 }

旁注:由于您的所有函数彼此独立,也许您可​​以将它们移出块并在调用 browserObj.newPage()

之前声明它们

这里是重构后的代码,如有不妥请见谅,不过你懂的。

"use strict";

const fs = require("fs");
const glob = require("glob");
const validUrl = require("valid-url");
const puppeteer = require("puppeteer");
const devices = require("puppeteer/DeviceDescriptors");
const iPhone = devices["iPhone 6"];

/**
 * Loading Application Config
 */
const inputUrl = "http://www.google.com";

/**
 * Controller functions
 */

async function closeHeadlesssChrome(browserObj) {
  console.log("-- Trying to Close Chome Headless Window");
  await browserObj.close();
}

async function setChromeViewport(pageObj) {
  console.log("-- Trying to Update page viewPort");
  await pageObj.setViewport({
    width: 1366,
    height: 738,
    deviceScaleFactor: 1,
    isMobile: false,
    hasTouch: false,
    isLandscape: false
  });
}

var takeScreenshot = async function(pageObj, srcUrl) {
  console.log("-- Trying to Load Web Page " + srcUrl);
  await pageObj.goto(srcUrl);

  console.log("-- Trying to Take Screenshot");
  await pageObj.screenshot({
    path: srcUrl + ".png",
    clip: {
      x: 0,
      y: 0,
      width: 795,
      height: 1125
    }
  });
};

function getURLList() {
  // Input or Source Url
  const inputUrl = "C:/Users/ssp/Music/BR PUBLIC INTER COLLEGE";
  var matchedFiles = [];

  // Check if given Url/Path exists
  if (fs.existsSync(inputUrl)) {
    const inputUrlObj = fs.statSync(inputUrl);
    if (inputUrlObj.isDirectory()) {
      matchedFiles = glob.GlobSync(inputUrl + "/**/*.svg").found;
    } else if (inputUrlObj.isFile()) {
      matchedFiles.push(inputUrl);
    }
    return matchedFiles;
  }
}

/**
 * Using Puppeteer.js
 */

(async () => {
  // get url list
  const matchedFiles = getURLList();
  if (!matchedFiles) {
    console.log("-- Input Url not exists");
    // if there is no url, then no need to even launch the browser and waste resources
    return;
  }

  console.log("-- Trying to Launch Puppeteer");
  const browserObj = await puppeteer.launch({
    headless: true
  });

  console.log('-- Trying to Open New Page');
  const pageObj = await browserObj.newPage();

  console.log('-- Change Viewport');
  await setChromeViewport(pageObj);

  console.log('-- Run thru the url list');
  for (const srcUrl of matchedFiles) {
    await takeScreenshot(pageObj, srcUrl);
  }
})();