图片 URL 未使用 Puppeteer 打开
Image URL Is Not Opening with Puppeteer
我需要能够为网络抓取项目打开和截取图像 url。我的脚本在给定初始页面 URL 时工作正常,但是在检索到所需图像 URL 后,它不起作用。
这是主脚本中的函数:
function ImageFetcher(pageURL, partName, urlHostName, selector) {
return new Promise( async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(pageURL);
let imageHref = await page.evaluate((sel) => {
return document.querySelector(sel).getAttribute('src').replace('//', 'https://');
}, selector)
console.log(imageHref);
await page.close();
await page.goto(imageHref);
await page.waitForSelector("body > img");
const image = await page.$("body > img");
await image.screenshot({path: `./image-test/${partName}.png`});
await page.close();
await browser.close();
console.log(`${urlHostName.host} Image Captured`);
return resolve();
} catch(e) {console.log(`Error ${urlHostName.host}! Part Name: ${partName}`)};
});
}
var index = 0;
var array = json.Part;
async function start() {
for (let index = 0; index < 1; index++) {
const element = array[index];
try {
await urlSorter(element);
} catch(e) {console.log(`URL Sorter Error, Part Name: ${partName} ${urlHostName.host}`)};
}
} start();
这里有一个关于这个问题的更孤立的测试(这也不起作用,打开的页面是空白的)
const puppeteer = require('puppeteer');
pageURL = "https://static.grainger.com/rp/s/is/image/Grainger/1RVB9_AS01?hei=536&wid=536";
function ImageFinder(pageURL) {
return new Promise( async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(pageURL);
await page.close();
await browser.close();
} catch(e) {console.log(`ERR`)}
})
}
ImageFinder(pageURL);
This is the result of both versions of the above code
However, if manually paste the URL in to the browser the image displays
您的 URL 缺少协议部分。手动粘贴到浏览器地址栏时会自动添加,但需要用puppeteer明确指定。
所以尝试:
pageURL = "http://static.grainger.com/rp/s/is/image/Grainger/1RVB9_AS01?hei=536&wid=536";
我需要能够为网络抓取项目打开和截取图像 url。我的脚本在给定初始页面 URL 时工作正常,但是在检索到所需图像 URL 后,它不起作用。
这是主脚本中的函数:
function ImageFetcher(pageURL, partName, urlHostName, selector) {
return new Promise( async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(pageURL);
let imageHref = await page.evaluate((sel) => {
return document.querySelector(sel).getAttribute('src').replace('//', 'https://');
}, selector)
console.log(imageHref);
await page.close();
await page.goto(imageHref);
await page.waitForSelector("body > img");
const image = await page.$("body > img");
await image.screenshot({path: `./image-test/${partName}.png`});
await page.close();
await browser.close();
console.log(`${urlHostName.host} Image Captured`);
return resolve();
} catch(e) {console.log(`Error ${urlHostName.host}! Part Name: ${partName}`)};
});
}
var index = 0;
var array = json.Part;
async function start() {
for (let index = 0; index < 1; index++) {
const element = array[index];
try {
await urlSorter(element);
} catch(e) {console.log(`URL Sorter Error, Part Name: ${partName} ${urlHostName.host}`)};
}
} start();
这里有一个关于这个问题的更孤立的测试(这也不起作用,打开的页面是空白的)
const puppeteer = require('puppeteer');
pageURL = "https://static.grainger.com/rp/s/is/image/Grainger/1RVB9_AS01?hei=536&wid=536";
function ImageFinder(pageURL) {
return new Promise( async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(pageURL);
await page.close();
await browser.close();
} catch(e) {console.log(`ERR`)}
})
}
ImageFinder(pageURL);
This is the result of both versions of the above code
However, if manually paste the URL in to the browser the image displays
您的 URL 缺少协议部分。手动粘贴到浏览器地址栏时会自动添加,但需要用puppeteer明确指定。
所以尝试:
pageURL = "http://static.grainger.com/rp/s/is/image/Grainger/1RVB9_AS01?hei=536&wid=536";