图片 URL 未使用 Puppeteer 打开

Image URL Is Not Opening with Puppeteer

我需要能够为网络抓取项目打开和截取图像 url。我的脚本在给定初始页面 URL 时工作正常,但是在检索到所需图像 URL 后,它不起作用。

这是主脚本中的函数:

function ImageFetcher(pageURL, partName, urlHostName, selector) {
  return new Promise( async (resolve, reject) => {
  try {        
      const browser = await puppeteer.launch({
        headless: false,
      });  
      const page = await browser.newPage();      
      await page.goto(pageURL);

      let imageHref = await page.evaluate((sel) => {
        return document.querySelector(sel).getAttribute('src').replace('//', 'https://');
    }, selector)
      console.log(imageHref);   

      await page.close();

      await page.goto(imageHref);
      await page.waitForSelector("body > img");
      const image = await page.$("body > img");
      await image.screenshot({path: `./image-test/${partName}.png`});

      await page.close();
      await browser.close();   

      console.log(`${urlHostName.host} Image Captured`); 
      return resolve();
    } catch(e) {console.log(`Error ${urlHostName.host}! Part Name: ${partName}`)};
  });
}

var index = 0; 
var array = json.Part;
async function start() {
  for (let index = 0; index < 1; index++) {
    const element = array[index];
    try {
        await urlSorter(element);
    } catch(e) {console.log(`URL Sorter Error, Part Name: ${partName} ${urlHostName.host}`)};
  }
} start();

这里有一个关于这个问题的更孤立的测试(这也不起作用,打开的页面是空白的)

const puppeteer = require('puppeteer');

pageURL = "https://static.grainger.com/rp/s/is/image/Grainger/1RVB9_AS01?hei=536&wid=536";


function ImageFinder(pageURL) {
    return new Promise( async (resolve, reject) => {
      try {
        const browser = await puppeteer.launch({
          headless: false,
        });  
        const page = await browser.newPage();  
        await page.goto(pageURL);
        
        await page.close();
        await browser.close();

  
      } catch(e) {console.log(`ERR`)}
    })
  }

  ImageFinder(pageURL);

This is the result of both versions of the above code

However, if manually paste the URL in to the browser the image displays

您的 URL 缺少协议部分。手动粘贴到浏览器地址栏时会自动添加,但需要用puppeteer明确指定。

所以尝试:

pageURL = "http://static.grainger.com/rp/s/is/image/Grainger/1RVB9_AS01?hei=536&wid=536";