Node JS获取PDF缓冲区的第一页
Node JS get the first page of PDF buffer
我正在寻找一种在 NodeJS 中获取 PDF 第一页的简单方法。 PDF 可用作缓冲区 (Uint8Array),第一页应该是缓冲区。
经过大量研究和尝试不同的工具后,我终于找到了能够执行此操作的库 (HummusJS)。
但事实证明这个问题不是一个微不足道的问题。还有一些细微差别。
首先,库默认不支持从缓冲区读取。这是一个允许执行此操作的适配器:
/*
PDFRStreamForBuffer is an implementation of a read stream using a supplied array
@author Luciano Júnior
*/
'use strict';
const EventEmitter = require('events');
class PDFRStreamForBuffer {
constructor(buffer) {
this.innerBuffer = buffer;
this.rposition = 0;
this.fileSize = buffer.byteLength;
}
read(inAmount) {
let arr = [];
for (let i = 0; i < inAmount; i++) {
arr.push(this.innerBuffer[this.rposition + i]);
}
this.rposition += inAmount;
return arr;
}
notEnded() {
return this.rposition < this.fileSize;
}
setPosition(inPosition) {
this.rposition = inPosition;
}
setPositionFromEnd(inPosition) {
this.rposition = this.fileSize - inPosition;
}
skip(inAmount) {
this.rposition += inAmount;
}
getCurrentPosition() {
return this.rposition;
}
}
module.exports = PDFRStreamForBuffer;
默认情况下,HummusJS 只是将输出写入文件。有一个内置适配器将其输出到可写流。所以最后我不得不使用 'memory-streams' 模块从流中获取结果缓冲区。
清楚这一点并且您拥有 PDFRStreamForBuffer 后,您可以 运行 此代码段:
'use strict';
const hummus = require('hummus');
const fs = require('fs');
const streams = require('memory-streams');
const PDFRStreamForBuffer = require('./pdfr-stream-for-buffer.js');
const path = require('path');
const getFirstPage = function (buffer) {
//Creating a stream, so hummus pushes the result to it
let outStream = new streams.WritableStream();
//Using PDFStreamForResponse to be able to pass a writable stream
let pdfWriter = hummus.createWriter(new hummus.PDFStreamForResponse(outStream));
//Using our custom PDFRStreamForBuffer adapter so we are able to read from buffer
let copyingContext = pdfWriter.createPDFCopyingContext(new PDFRStreamForBuffer(buffer));
//Get the first page.
copyingContext.appendPDFPageFromPDF(0);
//We need to call this as per docs/lib examples
pdfWriter.end();
//Here is a nuance.
//HummusJS does it's work SYNCHRONOUSLY. This means that by this line
//everything is written to our stream. So we can safely run .end() on our stream.
outStream.end();
//As we used 'memory-stream' and our stream is ended
//we can just grab stream's content and return it
return outStream.toBuffer();
};
//Getting the buffer from disk (sync just for demo purpose)
let pdfBuffer = fs.readFileSync(path.join(__dirname, '/original.pdf'));
let firstPageBuffer = getFirstPage(pdfBuffer);
//I wrote it back to disk for testing
fs.writeFileSync(path.join(__dirname, '/result.pdf'), firstPageBuffer);
为了编写此代码段,我进行了科学研究。所以希望对你有帮助。 :)
我正在寻找一种在 NodeJS 中获取 PDF 第一页的简单方法。 PDF 可用作缓冲区 (Uint8Array),第一页应该是缓冲区。
经过大量研究和尝试不同的工具后,我终于找到了能够执行此操作的库 (HummusJS)。 但事实证明这个问题不是一个微不足道的问题。还有一些细微差别。
首先,库默认不支持从缓冲区读取。这是一个允许执行此操作的适配器:
/*
PDFRStreamForBuffer is an implementation of a read stream using a supplied array
@author Luciano Júnior
*/
'use strict';
const EventEmitter = require('events');
class PDFRStreamForBuffer {
constructor(buffer) {
this.innerBuffer = buffer;
this.rposition = 0;
this.fileSize = buffer.byteLength;
}
read(inAmount) {
let arr = [];
for (let i = 0; i < inAmount; i++) {
arr.push(this.innerBuffer[this.rposition + i]);
}
this.rposition += inAmount;
return arr;
}
notEnded() {
return this.rposition < this.fileSize;
}
setPosition(inPosition) {
this.rposition = inPosition;
}
setPositionFromEnd(inPosition) {
this.rposition = this.fileSize - inPosition;
}
skip(inAmount) {
this.rposition += inAmount;
}
getCurrentPosition() {
return this.rposition;
}
}
module.exports = PDFRStreamForBuffer;
默认情况下,HummusJS 只是将输出写入文件。有一个内置适配器将其输出到可写流。所以最后我不得不使用 'memory-streams' 模块从流中获取结果缓冲区。
清楚这一点并且您拥有 PDFRStreamForBuffer 后,您可以 运行 此代码段:
'use strict';
const hummus = require('hummus');
const fs = require('fs');
const streams = require('memory-streams');
const PDFRStreamForBuffer = require('./pdfr-stream-for-buffer.js');
const path = require('path');
const getFirstPage = function (buffer) {
//Creating a stream, so hummus pushes the result to it
let outStream = new streams.WritableStream();
//Using PDFStreamForResponse to be able to pass a writable stream
let pdfWriter = hummus.createWriter(new hummus.PDFStreamForResponse(outStream));
//Using our custom PDFRStreamForBuffer adapter so we are able to read from buffer
let copyingContext = pdfWriter.createPDFCopyingContext(new PDFRStreamForBuffer(buffer));
//Get the first page.
copyingContext.appendPDFPageFromPDF(0);
//We need to call this as per docs/lib examples
pdfWriter.end();
//Here is a nuance.
//HummusJS does it's work SYNCHRONOUSLY. This means that by this line
//everything is written to our stream. So we can safely run .end() on our stream.
outStream.end();
//As we used 'memory-stream' and our stream is ended
//we can just grab stream's content and return it
return outStream.toBuffer();
};
//Getting the buffer from disk (sync just for demo purpose)
let pdfBuffer = fs.readFileSync(path.join(__dirname, '/original.pdf'));
let firstPageBuffer = getFirstPage(pdfBuffer);
//I wrote it back to disk for testing
fs.writeFileSync(path.join(__dirname, '/result.pdf'), firstPageBuffer);
为了编写此代码段,我进行了科学研究。所以希望对你有帮助。 :)