【问题标题】:Integrate puppeteer inside google cloud function listener在谷歌云函数监听器中集成 puppeteer
【发布时间】:2019-08-31 22:20:45
【问题描述】:

我在节点 8 运行时运行谷歌云功能,为侦听器分配了 2g。 听者:没有木偶的作品; Puppeteer:在获取请求中使用时起作用;

但我希望 puppeteer 在我的侦听器内部工作,这会使我已经有效的侦听器抛出错误。

我已经阅读了许多文档,但没有一个将 puppeteer 集成到侦听器中。

编辑:我也尝试将返回类型更改为有效格式,但无济于事。

我的有效听众

exports.listen = functions.firestore
    .document('/request/{id}')
    .onWrite((change, context) => {
        // Grab the data from the original function trigger 
        const document = await change.after.data()['data'];

        // Dummy return of what will eventually be given
        return change.after.ref.set(
            {
                results: [
                    { title: 'New Title', Country: 'Lala Land' },
                    { title: 'New Job', Country: 'Asgardia' }
                ]
            },
            { merge: true }
        );
    });

有效的 Puppeteer 函数

const express = require('express');
const functions = require('firebase-functions');
const puppeteer = require('puppeteer');
const app = express();
// Runs before every route. Launches headless Chrome.
app.all('*', async (req, res, next) => {
  // Note: --no-sandbox is required in this env.
  // Could also launch chrome and reuse the instance
  // using puppeteer.connect()
  res.locals.browser = await puppeteer.launch({
    args: ['--no-sandbox']
  });
  next(); // pass control to next route.
});
// Handler to take screenshots of a URL.
app.get('/screenshot', async function screenshotHandler(req, res) {
  const url = req.query.url;
  if (!url) {
    return res.status(400).send(
      'Please provide a URL. Example: ?url=https://example.com');
  }
  const browser = res.locals.browser;
  try {
    const page = await browser.newPage();
    await page.goto(url, {waitUntil: 'networkidle2'});
    const buffer = await page.screenshot({fullPage: true});
    res.type('image/png').send(buffer);
  } catch (e) {
    res.status(500).send(e.toString());
  }
  await browser.close();
});

将 puppeteer 的无效侦听器更改为最低需求

exports.listen = functions.firestore
    .document('/request/{id}')
    .onWrite((change, context) => {
        const document = change.after.data()['data'];
        async function benchmark() {

            const browser = await puppeteer.launch({
                args: ['--no-sandbox']
            });
            const page = await browser.newPage();
            await page.goto('http://picocms.org/', {
                waitUntil: 'networkidle2'
            });
            const content = await page.content();
            return content;
        }

        return change.after.ref.set(
            {
                // results: [
                //  { title: 'New Title', Country: 'Lala Land' },
                //  { title: 'New Job', Country: 'Asgardia' }
                // ]
                results: benchmark()
            },
            { merge: true }
        );
    });

日志中产生错误

Error: Value for argument "data" is not a valid Firestore document. Input is not a plain JavaScript object (found in field results).
    at Object.validateUserInput (/srv/node_modules/@google-cloud/firestore/build/src/serializer.js:312:15)
    at validateDocumentData (/srv/node_modules/@google-cloud/firestore/build/src/write-batch.js:622:26)
    at WriteBatch.set (/srv/node_modules/@google-cloud/firestore/build/src/write-batch.js:242:9)
    at DocumentReference.set (/srv/node_modules/@google-cloud/firestore/build/src/reference.js:337:27)
    at exports.listen.functions.firestore.document.onWrite (/srv/index.js:57:27)
    at cloudFunctionNewSignature (/srv/node_modules/firebase-functions/lib/cloud-functions.js:114:23)
    at /worker/worker.js:825:24
    at <anonymous>
    at process._tickDomainCallback (internal/process/next_tick.js:229:7)

【问题讨论】:

  • 如果有人自己创建了一个集成了 puppeteer 的最小侦听器,请随意放弃他们的工作 sn-p。
  • 您的错误消息似乎与 puppeteer 没有任何关系。我会说你对这条线有问题:const document = change.after.data()['data'];
  • 问题是,它可以在没有 puppeteer 代码的情况下正常工作,我已经对其进行了没有错误的测试,并且它给出了正确的输出。

标签: javascript node.js google-cloud-functions puppeteer


【解决方案1】:

您的 benchmark() 函数是 async,因此在调用时会返回一个 Promise。

因此,您尝试在此处存储 Promise:

        return change.after.ref.set(
            {
                results: benchmark()     // This is a promise
            },
            { merge: true }
        );

您希望存储来自 Promise 的已解析值。您必须将您的函数更改为async,然后在您的benchmark() 调用前添加一个await。然后你的数据就会变成一个“普通的 JavaScript 对象”,并且可以被序列化和存储。

【讨论】:

    【解决方案2】:

    他们确实应该为此用途制作文档,但我找到了自己的解决方案,我只需要遵守在帖子中使用 puppeteer 或获取请求,因为您无法访问 (request, response) 参数。

    对于任何想以自己的方式使用它的人来说,这里是一个最小可行的解决方案。

    注意:我将运行时升级到分配了 2G 的 Node 10(测试版)。

    const functions = require('firebase-functions');
    const puppeteer = require('puppeteer');
    const app = require('express')();
    const { db } = require('./util/admin');
    
    // Runs before every route. Launches headless Chrome.
    app.all('*', async (req, res, next) => {
        // Note: --no-sandbox is required in this env.
        // Could also launch chrome and reuse the instance
        // using puppeteer.connect()
        res.locals.browser = await puppeteer.launch({
            args: ['--no-sandbox']
        });
        next(); // pass control to next route.
    });
    
    app.post('/create', async (req, res) => {
        const request = {
            data: req.body.data,
            created: new Date().toISOString()
        };
        const browser = res.locals.browser;
        const page = await browser.newPage();
        await page.goto('http://picocms.org/', {
            waitUntil: 'networkidle2'
        });
        const content = await page.content();
        request.data = JSON.stringify(content);
        await browser.close();
        db.collection('request')
            .add(request)
            .then(doc => {
                return res.json({
                    message: `[success] ${doc.id} Generated`
                });
            })
            .catch(e => {
                res.status(500).json({
                    error: `[failed] No Request Was Saved _> ${e}`
                });
            });
    });
    
    exports.api = functions.https.onRequest(app);
    
    

    【讨论】:

      猜你喜欢
      • 2019-06-16
      • 1970-01-01
      • 1970-01-01
      • 2020-06-17
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多