在 google 云函数监听器中集成 puppeteer

Integrate puppeteer inside google cloud function listener

我是 运行 google 节点 8 运行时下的云函数,为侦听器分配了 2g。 听众:没有木偶操作的作品; Puppeteer:在 get 请求中使用时有效;

但我希望人偶操纵者在我的监听器内部工作,这会使我已经有效的监听器抛出错误。

我阅读了许多文档,none将 puppeteer 集成到侦听器中。

编辑:我也尝试将 return 类型更改为有效格式,但无济于事。

我的有效听众

exports.listen = functions.firestore
    .document('/request/{id}')
    .onWrite((change, context) => {
        // Grab the data from the original function trigger 
        const document = await change.after.data()['data'];

        // Dummy return of what will eventually be given
        return change.after.ref.set(
            {
                results: [
                    { title: 'New Title', Country: 'Lala Land' },
                    { title: 'New Job', Country: 'Asgardia' }
                ]
            },
            { merge: true }
        );
    });

有效的 Puppeteer 函数

const express = require('express');
const functions = require('firebase-functions');
const puppeteer = require('puppeteer');
const app = express();
// Runs before every route. Launches headless Chrome.
app.all('*', async (req, res, next) => {
  // Note: --no-sandbox is required in this env.
  // Could also launch chrome and reuse the instance
  // using puppeteer.connect()
  res.locals.browser = await puppeteer.launch({
    args: ['--no-sandbox']
  });
  next(); // pass control to next route.
});
// Handler to take screenshots of a URL.
app.get('/screenshot', async function screenshotHandler(req, res) {
  const url = req.query.url;
  if (!url) {
    return res.status(400).send(
      'Please provide a URL. Example: ?url=https://example.com');
  }
  const browser = res.locals.browser;
  try {
    const page = await browser.newPage();
    await page.goto(url, {waitUntil: 'networkidle2'});
    const buffer = await page.screenshot({fullPage: true});
    res.type('image/png').send(buffer);
  } catch (e) {
    res.status(500).send(e.toString());
  }
  await browser.close();
});

木偶操纵者的无效侦听器已更改为最低需求

exports.listen = functions.firestore
    .document('/request/{id}')
    .onWrite((change, context) => {
        const document = change.after.data()['data'];
        async function benchmark() {

            const browser = await puppeteer.launch({
                args: ['--no-sandbox']
            });
            const page = await browser.newPage();
            await page.goto('http://picocms.org/', {
                waitUntil: 'networkidle2'
            });
            const content = await page.content();
            return content;
        }

        return change.after.ref.set(
            {
                // results: [
                //  { title: 'New Title', Country: 'Lala Land' },
                //  { title: 'New Job', Country: 'Asgardia' }
                // ]
                results: benchmark()
            },
            { merge: true }
        );
    });

日志中产生错误

Error: Value for argument "data" is not a valid Firestore document. Input is not a plain JavaScript object (found in field results).
    at Object.validateUserInput (/srv/node_modules/@google-cloud/firestore/build/src/serializer.js:312:15)
    at validateDocumentData (/srv/node_modules/@google-cloud/firestore/build/src/write-batch.js:622:26)
    at WriteBatch.set (/srv/node_modules/@google-cloud/firestore/build/src/write-batch.js:242:9)
    at DocumentReference.set (/srv/node_modules/@google-cloud/firestore/build/src/reference.js:337:27)
    at exports.listen.functions.firestore.document.onWrite (/srv/index.js:57:27)
    at cloudFunctionNewSignature (/srv/node_modules/firebase-functions/lib/cloud-functions.js:114:23)
    at /worker/worker.js:825:24
    at <anonymous>
    at process._tickDomainCallback (internal/process/next_tick.js:229:7)

他们真的应该为此用途制作文档,但我找到了自己的解决方案,我只需要遵守在 post 中使用 puppeteer 或获取请求,因为您无权访问(请求,响应)参数。

对于任何想以自己的方式使用它的人来说,这里是构建的最小可行解决方案。

注意:我将运行时升级到分配了 2G 的 Node 10(测试版)。

const functions = require('firebase-functions');
const puppeteer = require('puppeteer');
const app = require('express')();
const { db } = require('./util/admin');

// Runs before every route. Launches headless Chrome.
app.all('*', async (req, res, next) => {
    // Note: --no-sandbox is required in this env.
    // Could also launch chrome and reuse the instance
    // using puppeteer.connect()
    res.locals.browser = await puppeteer.launch({
        args: ['--no-sandbox']
    });
    next(); // pass control to next route.
});

app.post('/create', async (req, res) => {
    const request = {
        data: req.body.data,
        created: new Date().toISOString()
    };
    const browser = res.locals.browser;
    const page = await browser.newPage();
    await page.goto('http://picocms.org/', {
        waitUntil: 'networkidle2'
    });
    const content = await page.content();
    request.data = JSON.stringify(content);
    await browser.close();
    db.collection('request')
        .add(request)
        .then(doc => {
            return res.json({
                message: `[success] ${doc.id} Generated`
            });
        })
        .catch(e => {
            res.status(500).json({
                error: `[failed] No Request Was Saved _> ${e}`
            });
        });
});

exports.api = functions.https.onRequest(app);

您的 benchmark() 函数是 async,因此在调用时将 return 一个 Promise。

因此,您正试图在此处存储一个 Promise:

        return change.after.ref.set(
            {
                results: benchmark()     // This is a promise
            },
            { merge: true }
        );

您想改为存储从 Promise 解析的值。您必须将函数更改为 async,然后在 benchmark() 调用前添加一个 await。那么你的数据就会变成一个"plain JavaScript object",可以序列化存储了