从 url 获取 pdf blob 并使用 puppeteer 库直接插入驱动器并获取
Getting the pdf blob from url and insert to drive directly using puppeteer library and fetch
我正在尝试使用 puppeteer 登录网站,然后 "download" 将 pdf 直接发送到我的驱动器。我已经设法通过 puppeteer 访问了 pdf 页面,并且我尝试(在其他尝试之间)使用 fetch 和 cookies 来获取 blob 以发送到驱动器。我无法在此处 post 登录信息,但如果您能帮助我查找代码中的错误(或更多),那就太好了!现在,它转到 pdf 之前的页面,获取 link,使用 cookie 获取并在驱动器中插入 pdf,但 pdf 已损坏 0 kb。
我尝试了 setRequestInterception、getPdf(来自 puppeteer)并将缓冲区与我在研究中发现的一些东西一起使用。
//Page before pdfPage. Here I got the link: urlPdf
//await page.goto(urlPdf);
//await page.waitForNavigation();
//const htmlPdf = await page.content();
const cookies = await page.cookies()
const opts = {
headers: {
cookie: cookies
}
};
let blob = await fetch(urlPdf,opts).then(r => r.blob());
console.log("pegou o blob")
// upload file in specific folder
var file ;
console.log("driveApi upload reached")
function blobToFile(req){
file = req.body.blob
//A Blob() is almost a File() - it's just missing the two properties below which we will add
file.lastModifiedDate = new Date();
file.name = teste.pdf;//req.body.word;
return file;
}
var folderId = myFolderId;
var fileMetadata = {
'name': 'teste.pdf',
parents: [folderId]
};
var media = {
mimeType: 'application/pdf',
body: file
};
drive.files.create({
auth: jwToken,
resource: fileMetadata,
media: media,
fields: 'id'
}, function(err, file) {
if (err) {
// Handle error
console.error(err);
} else {
console.log('File Id: ', file.data.id);
}
});
我尝试了很多方法,但我得出的最终解决方案发布在这里:
await page.setRequestInterception(true);
page.on('request', async request => {
if (request.url().indexOf('exibirFat.do')>0) { //This condition is true only in pdf page (in my case of course)
const options = {
encoding: null,
method: request._method,
uri: request._url,
body: request._postData,
headers: request._headers
}
/* add the cookies */
const cookies = await page.cookies();
options.headers.Cookie = cookies.map(ck => ck.name + '=' + ck.value).join(';');
/* resend the request */
const response = await request_client(options);
//console.log(response); // PDF Buffer
buffer = response;
let filename = 'file.pdf';
fs.writeFileSync(filename, buffer); //Save file
} else {
request.continue();
}
});
这个解决方案需要:const request_client = require('request-promise-native');
我正在尝试使用 puppeteer 登录网站,然后 "download" 将 pdf 直接发送到我的驱动器。我已经设法通过 puppeteer 访问了 pdf 页面,并且我尝试(在其他尝试之间)使用 fetch 和 cookies 来获取 blob 以发送到驱动器。我无法在此处 post 登录信息,但如果您能帮助我查找代码中的错误(或更多),那就太好了!现在,它转到 pdf 之前的页面,获取 link,使用 cookie 获取并在驱动器中插入 pdf,但 pdf 已损坏 0 kb。
我尝试了 setRequestInterception、getPdf(来自 puppeteer)并将缓冲区与我在研究中发现的一些东西一起使用。
//Page before pdfPage. Here I got the link: urlPdf
//await page.goto(urlPdf);
//await page.waitForNavigation();
//const htmlPdf = await page.content();
const cookies = await page.cookies()
const opts = {
headers: {
cookie: cookies
}
};
let blob = await fetch(urlPdf,opts).then(r => r.blob());
console.log("pegou o blob")
// upload file in specific folder
var file ;
console.log("driveApi upload reached")
function blobToFile(req){
file = req.body.blob
//A Blob() is almost a File() - it's just missing the two properties below which we will add
file.lastModifiedDate = new Date();
file.name = teste.pdf;//req.body.word;
return file;
}
var folderId = myFolderId;
var fileMetadata = {
'name': 'teste.pdf',
parents: [folderId]
};
var media = {
mimeType: 'application/pdf',
body: file
};
drive.files.create({
auth: jwToken,
resource: fileMetadata,
media: media,
fields: 'id'
}, function(err, file) {
if (err) {
// Handle error
console.error(err);
} else {
console.log('File Id: ', file.data.id);
}
});
我尝试了很多方法,但我得出的最终解决方案发布在这里:
await page.setRequestInterception(true);
page.on('request', async request => {
if (request.url().indexOf('exibirFat.do')>0) { //This condition is true only in pdf page (in my case of course)
const options = {
encoding: null,
method: request._method,
uri: request._url,
body: request._postData,
headers: request._headers
}
/* add the cookies */
const cookies = await page.cookies();
options.headers.Cookie = cookies.map(ck => ck.name + '=' + ck.value).join(';');
/* resend the request */
const response = await request_client(options);
//console.log(response); // PDF Buffer
buffer = response;
let filename = 'file.pdf';
fs.writeFileSync(filename, buffer); //Save file
} else {
request.continue();
}
});
这个解决方案需要:const request_client = require('request-promise-native');