使用 puppter 从 flashScore 中提取所有 id

Extract all ids with puppter from flashScore

我需要从 https://www.flashscore.es/ 中提取所有 div。 使用以下代码,我可以打印我需要的所有元素:

const puppeteer = require('puppeteer');
const fs = require('fs');
const list = [];

(async () => {
  const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1920, height: 1080 }, args: ['--start-maximized'] });
  const page = await browser.newPage();
  let element, formElement, tabs;
  await page.goto(`https://www.flashscore.es/baloncesto/`);
  await delay(4000);
  element = await page.$x(`//div[starts-with(@id,'g_3_')]`);

  console.log(element);
})();

function delay(time) {
  return new Promise(function(resolve) { 
    setTimeout(resolve, time)
  });
}

元素的 HTML 看起来像这样:

<div id="g_3_Qyk44tlB" title="¡Haga click para detalles del partido!" elementtiming="SpeedCurveFRP" class="event__match event__match--live event__match--twoLine">

如何获取所需元素的id?

您已经获得了所需的元素,现在让我们遍历所有元素并提取“id”属性:

const puppeteer = require('puppeteer');

(async () => {
  const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1920, height: 1080 }, args: ['--start-maximized'] });
  const page = await browser.newPage();

  await page.goto(`https://www.flashscore.es/baloncesto/`);
  
  // More reliable than wait for an arbitrary N of seconds
  await page.waitForSelector('[id^="g_3_"]');

  const ids = [];
  const elements = await page.$x(`//div[starts-with(@id,'g_3_')]`);
  for(const elementHandle of elements) {
    const jsHandle = await elementHandle.getProperty("id")
    ids.push(await jsHandle.jsonValue());
  }
  console.log(ids);
  await page.close();
  await browser.close();
})();

结果:

P.S。在 puppeteer 中有一个等待的内置函数:

await page.waitForTimeout(4000);