如何使用 Puppeteer 打印 HTML 文档?

How to print an HTML document using Puppeteer?

最近我开始使用 Puppeteer 抓取网络。以下是从商城中提取特定商品名称的代码。

const puppeteer = require('puppeteer');

(async () => {

    const width = 1600, height = 1040;

    const option = { headless: false, slowMo: true, args: [`--window-size=${width},${height}`] };

    const browser = await puppeteer.launch(option);
    const page = await browser.newPage();
    const vp = {width: width, height: height};
    await page.setViewport(vp);

    const navigationPromise = page.waitForNavigation();

    await page.goto('https://shopping.naver.com/home/p/index.nhn');
    await navigationPromise;
    await page.waitFor(2000);

    const textBoxId = 'co_srh_input';
    await page.type('.' + textBoxId, '양말', {delay: 100});
    await page.keyboard.press('Enter');

    await page.waitFor(5000);
    await page.waitForSelector('div.info > a.tit');

    const stores = await page.evaluate(() => {
        const links = Array.from(document.querySelectorAll('div.info > a.tit'));
        return links.map(link => link.innerText).slice(0, 10)   // 10개 제품만 가져오기
    });

    console.log(stores);
    await browser.close();

})();

我有一个问题。如何将爬取结果输出到HTML文档(不使用数据库)?请使用示例代码进行解释。

fs.writeFile()

您可以使用下面的write_file函数,returns一个Promise that resolves or rejects when fs.writeFile()成功或失败。

然后,您可以从您的匿名异步函数中await the Promise检查数据是否已写入文件:

'use strict';

const fs = require('fs');
const puppeteer = require('puppeteer');

const write_file = (file, data) => new Promise((resolve, reject) => {
  fs.writeFile(file, data, 'utf8', error => {
    if (error) {
      console.error(error);
      reject(false);
    } else {
      resolve(true);
    }
  });
});

(async () => {
  
  // ...
  
  const stores = await page.evaluate(() => {
    return Array.from(document.querySelectorAll('div.info > a.tit'), link => link.innerText).slice(0, 10); // 10개 제품만 가져오기
  });
  
  if (await write_file('example.html', stores.toString()) === false) {
    console.error('Error: Unable to write stores to example.html.');
  }
  
  // ...
  
});

我使用了 blog.kowalczyk.info

上看到的内容
const puppeteer = require("puppeteer");
const fs = require("fs");

async function run() {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
    await page.goto("https://www.google.com/", { waitUntil: "networkidle2" });
    // hacky defensive move but I don't know a better way:
    // wait a bit so that the browser finishes executing JavaScript
    await page.waitFor(1 * 1000);
    const html = await page.content();
    fs.writeFileSync("index.html", html);
    await browser.close();
}

run();