【问题标题】:Cloud Functions for Firebase - Converting PDF to imageCloud Functions for Firebase - 将 PDF 转换为图像
【发布时间】:2017-08-31 18:21:38
【问题描述】:

Cloud Functions for Firebase 有这个很好的示例,他们为每个上传的图像创建一个缩略图。这是通过使用 ImageMagick 完成的。

我尝试转换示例以将 PDF 转换为图像。这是 ImageMagick 可以做的事情,但我无法使其与 Cloud Functions for Firebase 一起使用。我不断收到代码 1 错误:

ChildProcessError: `convert /tmp/cd9d0278-16b2-42be-aa3d-45b5adf89332.pdf[0] -density 200 /tmp/cd9d0278-16b2-42be-aa3d-45b5adf89332.pdf` failed with code 1
    at ChildProcess.<anonymous> (/user_code/node_modules/child-process-promise/lib/index.js:132:23)
    at emitTwo (events.js:106:13)
    at ChildProcess.emit (events.js:191:7)
    at maybeClose (internal/child_process.js:877:16)
    at Socket.<anonymous> (internal/child_process.js:334:11)
    at emitOne (events.js:96:13)
    at Socket.emit (events.js:188:7)
    at Pipe._handle.close [as _onclose] (net.js:498:12)

当然,一种可能性是根本不支持转换 PDF。

const functions = require('firebase-functions');
const gcs = require('@google-cloud/storage')();
const spawn = require('child-process-promise').spawn;
// [END import]

// [START generateThumbnail]
/**
 * When an image is uploaded in the Storage bucket We generate a thumbnail automatically using
 * ImageMagick.
 */
// [START generateThumbnailTrigger]
exports.generateThumbnail = functions.storage.object().onChange(event => {
// [END generateThumbnailTrigger]
    // [START eventAttributes]
    const object = event.data; // The Storage object.

    const fileBucket = object.bucket; // The Storage bucket that contains the file.
    const filePath = object.name; // File path in the bucket.
    const contentType = object.contentType; // File content type.
    const resourceState = object.resourceState; // The resourceState is 'exists' or 'not_exists' (for file/folder deletions).
    // [END eventAttributes]

    // [START stopConditions]
    // Exit if this is triggered on a file that is not an image.
    if (!contentType.startsWith('application/pdf')) {
        console.log('This is not a pdf.');
        return;
    }

    // Get the file name.
    const fileName = filePath.split('/').pop();
    // Exit if the image is already a thumbnail.
    if (fileName.startsWith('thumb_')) {
        console.log('Already a Thumbnail.');
        return;
    }

    // Exit if this is a move or deletion event.
    if (resourceState === 'not_exists') {
        console.log('This is a deletion event.');
        return;
    }
    // [END stopConditions]

    // [START thumbnailGeneration]
    // Download file from bucket.
    const bucket = gcs.bucket(fileBucket);
    const tempFilePath = `/tmp/${fileName}`;
    return bucket.file(filePath).download({
        destination: tempFilePath
    }).then(() => {
        console.log('Pdf downloaded locally to', tempFilePath);
        // Generate a thumbnail of the first page using ImageMagick.
        return spawn('convert', [tempFilePath+'[0]' ,'-density', '200', tempFilePath]).then(() => {
            console.log('Thumbnail created at', tempFilePath);
            // Convert pdf extension to png
            const thumbFilePath = filePath.replace('.pdf', 'png');
            // Uploading the thumbnail.
            return bucket.upload(tempFilePath, {
                destination: thumbFilePath
            });
        });
    });
    // [END thumbnailGeneration]
});

【问题讨论】:

  • 看来转换pdf文件还需要ghostscript包,Google Cloud Functions默认没有安装这个包。
  • 还有其他方法吗?可惜这个不可用...
  • 还没有找到。至少不要为 firebase 使用云功能。
  • 您可以使用 PDF.js (github.com/mozilla/pdf.js) 库来生成缩略图
  • PDF.js 有一些错误会阻止在服务器上呈现某些 pdf 的文本(自 2014 年以来就存在):github.com/mozilla/pdf.js/issues/4244

标签: node.js pdf firebase imagemagick google-cloud-functions


【解决方案1】:

节点模块可以安装与云函数源代码位于同一目录中的本机代码。我发现 github 上的一些节点库为 ghostscript 执行此操作,这是一个非常有用的 PDF 处理库:

我将 lambda-ghostscript 放入我的 functions 目录的子目录中,然后将 node-gs 作为依赖项添加到我的包文件中,如下所示:

{
  "name": "functions",
  "dependencies": {
    "@google-cloud/storage": "^1.3.1",
    "child-process-promise": "^2.2.1",
    "firebase-admin": "~5.4.0",
    "firebase-functions": "^0.7.2",
    "gs": "https://github.com/sina-masnadi/node-gs/tarball/master"
  }
}

然后在我的 index.js 文件中,我可以只要求节点库轻松使用 JavaScript 中的 ghostscript。以下是使用 Google Cloud Storage 触发器的 Cloud Function 的完整代码:

const functions = require('firebase-functions');
const gcs = require('@google-cloud/storage')();
const spawn = require('child-process-promise').spawn;
const path = require('path');
const os = require('os');
const fs = require('fs');
var   gs = require('gs');

exports.makePNG = functions.storage.object().onChange(event => {

  // ignore delete events
  if (event.data.resourceState == 'not_exists') return false;

  const filePath = event.data.name;
  const fileDir = path.dirname(filePath);
  const fileName = path.basename(filePath);
  const tempFilePath = path.join(os.tmpdir(), fileName);
  if (fileName.endsWith('.png')) return false;
  if (!fileName.endsWith('.pdf')) return false;

  const newName = path.basename(filePath, '.pdf') + '.png';
  const tempNewPath = path.join(os.tmpdir(), newName);


  // // Download file from bucket.
  const bucket = gcs.bucket(event.data.bucket);

  return bucket.file(filePath).download({
    destination: tempFilePath
  }).then(() => {
    console.log('Image downloaded locally to', tempFilePath);

    return new Promise(function (resolve, reject) {
        gs()
          .batch()
          .nopause()
          .option('-r' + 50 * 2)
          .option('-dDownScaleFactor=2')
          .executablePath('lambda-ghostscript/bin/./gs')
          .device('png16m')
          .output(tempNewPath)
          .input(tempFilePath)
          .exec(function (err, stdout, stderr) {
              if (!err) {
                console.log('gs executed w/o error');            
                console.log('stdout',stdout);            
                console.log('stderr',stderr);            
                resolve();
              } else {
                console.log('gs error:', err);
                reject(err);
              }
          });
    });

  }).then(() => {
    console.log('PNG created at', tempNewPath);

    // Uploading the thumbnail.
    return bucket.upload(tempNewPath, {destination: newName});
  // Once the thumbnail has been uploaded delete the local file to free up disk space.
  }).then(() => {
    fs.unlinkSync(tempNewPath);
    fs.unlinkSync(tempFilePath);
  }).catch((err) => {
    console.log('exception:', err);
    return err;
  });

});

这是github上的项目:https://github.com/ultrasaurus/ghostscript-cloud-function

免责声明:这是使用已编译的本机代码,我通过实验验证了它适用于这种情况,所以它可能没问题。我没有研究具体的编译选项并验证它们是否完全适合 Cloud Functions 环境。

【讨论】:

  • 你能给我一个 ELI5 关于你的第一段的意思吗?
  • 你能给我一个 ELI5 关于你的第一段的意思吗?
  • NodeJS 适用于 JavaScript 应用程序。 JavaScript 是一种解释性语言,因此在我的 mac 上运行的相同代码将在 linux 服务器(或任何地方)上运行。当您安装“包”(npm install)时,它可以包含来自其他语言(如 C 或 C++)的源代码,这些源代码(通常)在本地编译为“对象”代码。如果我编译代码以在我的 mac 上运行,然后将其复制到 linux 服务器,那么它将无法工作。我需要为目标操作系统和硬件编译它,如果我这样做,并放在与我的 Cloud Function 相同的目录中,那么我可以上传并且它可以工作!
  • 很好的解释!这个编译的 GS 是用什么语言编写的?
  • 我尝试使用 firebase 云功能但它不工作,我收到 spawn EACSESS 错误。你有机会提供解决方案吗?
【解决方案2】:

工作解决方案

感谢@Ultrasaurus 指出这种方法!但是,对我来说它不起作用,并且在您的 Github 回购中,您还声明了 I haven't tested them。我稍微修改了您的解决方案,得到了以下代码,它 100% 为我工作:

{
  "dependencies": {
    "@google-cloud/firestore": "^4.4.0",
    "@google-cloud/storage": "^5.3.0",
    "ghostscript": "https://github.com/musubu/node-ghostscript/tarball/master",
    "pdf-image": "^2.0.0",
    "rimraf": "^3.0.2",
    "uuid": "^8.3.1"
  }
}

该函数由 Firestore 事件触发:

const Storage = require('@google-cloud/storage')
const fs = require('fs')
const rimraf = require('rimraf')
const os = require('os')
const gs = require('ghostscript')

const GOOGLE_PROJECT_ID = 'MY_GOOGLE_PROJECT_ID'
const GOOGLE_STORAGE_BUCKET_NAME = 'MY_GOOGLE_STORAGE_BUCKET_NAME'

const storage = new Storage.Storage({
  projectId: GOOGLE_PROJECT_ID
})

exports.createImage = async (event) => {
  let {
    appointment,
    name
  } = event.value.fields

  name = getFileName(name.stringValue)
  appointment = appointment.stringValue

  console.log(`Processing document ${name} in appointment ${appointment}`)

  const tempDir = createTempDir(appointment)

  const tmpDocumentPath = await downloadPdf(tempDir, name, appointment)
  const imagePath = await convertPdfToImage(tmpDocumentPath)
  await uploadImage(imagePath, appointment)

  deleteDir(tempDir)
}

function getFileName (name) {
  const nameParts = name.split('/')
  return nameParts[nameParts.length - 1]
}

function createTempDir (appointment) {
  const tempDir = `${os.tmpdir()}/${appointment}_${Math.random()}`
  fs.mkdirSync(tempDir)
  console.log(`Created dir ${tempDir}`)
  return tempDir
}

async function downloadPdf (tempDir, name, appointment) {
  const destination = `${tempDir}/${name}`
  await storage.bucket(GOOGLE_STORAGE_BUCKET_NAME).file(`${appointment}/${name}`).download({ destination })
  console.log(`Successfully downloaded document ${name}`)
  return destination
}

async function convertPdfToImage (pdfPath) {
  const imagePath = pdfPath.replace('pdf', 'png')

  return new Promise(function (resolve, reject) {
    try {
      gs()
        .batch()
        .nopause()
        .device('png16m')
        .output(imagePath)
        .input(pdfPath)
        .exec(function (err, stdout, stderr) {
          if (!err) {
            console.log('gs executed w/o error')
            console.log('stdout', stdout)
            console.log('stderr', stderr)
            resolve(imagePath)
          } else {
            console.log('gs error:', err)
            reject(err)
          }
        })
    } catch (error) {
      console.log(error)
    }
  })
}

async function uploadImage (imagePath, appointment) {
  const imagePathParts = imagePath.split('/')
  const imageName = imagePathParts[imagePathParts.length - 1]

  console.log(`Starting upload for ${imageName} at ${imagePath} to storage ${appointment}/${imageName}`)

  await storage.bucket(GOOGLE_STORAGE_BUCKET_NAME).upload(imagePath, {
    destination: `${appointment}/${imageName}`,
    metadata: {
      metadata: { appointment }
    }
  })

  console.log(`Successfully uploaded image for appointment ${appointment}`)
}

function deleteDir (dir) {
  rimraf.sync(dir)
}

【讨论】:

    猜你喜欢
    • 2017-09-27
    • 2017-09-07
    • 2018-01-03
    • 2022-12-01
    • 1970-01-01
    • 2018-04-19
    • 2017-08-26
    • 2017-08-08
    相关资源
    最近更新 更多