无法抓取 Facebook 市场中的所有广告

can't scrape all adds in facebook marketplace

我正在尝试抓取 Facebook 市场,但只有第一个添加被抓取,任何人都可以建议一种方法来抓取整个添加列表

代码

(async () => {
    const browser = await puppeteer.launch({ headless: false });
    const page = await browser.newPage();
    await page.setViewport({ width: 1366, height: 500 });
    let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
    await page.goto(url, {
        waitUntil: 'networkidle2',
    });
    //await autoScroll(page);
    console.log('scraping...')
    await delay(4000);
    let data = await page.evaluate(() => {
       
        let carData = document.querySelectorAll('div [class = "fome6x0j tkqzz1yd aodizinl fjf4s8hc f7vcsfb0"]');
        const cars = []
        carData.forEach((element) => {
           // cars.push(element.innerText)
           let add = element.querySelector('div [class = "sonix8o1"]');
           cars.push(add.innerText)
        })
        return cars

    });
    console.log(data)
})();

这是我得到的输出,我想要实现的是单独访问每个数据

你可以试试这个。

(async () => {
  let browser, page;
  let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';

  try {
    browser = await puppeteer.launch({ headless: false });
    page = await browser.newPage();
    await page.setViewport({ width: 1366, height: 500 });
    await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });

    let data = await page.evaluate(() => {
      let cars = [];

      if (document.querySelectorAll('div[class="sonix8o1"]')) {
        document.querySelectorAll('div[class="sonix8o1"]').forEach(element => {
          cars.push(element.innerText);
        });
      }
            
      return cars;
    });

    console.log(data);
  } catch (error) {
    console.log(error.message);
  } finally {
    if (browser) {
      await browser.close();
      console.log('closing browser');
    }
  }
})();

或者你也可以这样试试

(async () => {
  let browser, page;
  let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';

  try {
    browser = await puppeteer.launch({ headless: false });
    page = await browser.newPage();
    await page.setViewport({ width: 1366, height: 500 });
    await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });

    let data = await page.evaluate(() => {
      let cars = [];

      if (document.querySelectorAll('div[class="sonix8o1"]')) {
        let elements = document.querySelectorAll('div[class="sonix8o1"]');
        for (let i = 0; i < elements.length; i++) {
          cars.push(elements[i].innerText);
        }
      }
            
      return cars;
    });

    console.log(data);
  } catch (error) {
    console.log(error.message);
  } finally {
    if (browser) {
      await browser.close();
      console.log('closing browser');
    }
  }
})();