使用 puppter 从 flashScore 中提取所有 id
Extract all ids with puppter from flashScore
我需要从 https://www.flashscore.es/ 中提取所有 div。
使用以下代码,我可以打印我需要的所有元素:
const puppeteer = require('puppeteer');
const fs = require('fs');
const list = [];
(async () => {
const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1920, height: 1080 }, args: ['--start-maximized'] });
const page = await browser.newPage();
let element, formElement, tabs;
await page.goto(`https://www.flashscore.es/baloncesto/`);
await delay(4000);
element = await page.$x(`//div[starts-with(@id,'g_3_')]`);
console.log(element);
})();
function delay(time) {
return new Promise(function(resolve) {
setTimeout(resolve, time)
});
}
元素的 HTML 看起来像这样:
<div id="g_3_Qyk44tlB" title="¡Haga click para detalles del partido!" elementtiming="SpeedCurveFRP" class="event__match event__match--live event__match--twoLine">
如何获取所需元素的id?
您已经获得了所需的元素,现在让我们遍历所有元素并提取“id”属性:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1920, height: 1080 }, args: ['--start-maximized'] });
const page = await browser.newPage();
await page.goto(`https://www.flashscore.es/baloncesto/`);
// More reliable than wait for an arbitrary N of seconds
await page.waitForSelector('[id^="g_3_"]');
const ids = [];
const elements = await page.$x(`//div[starts-with(@id,'g_3_')]`);
for(const elementHandle of elements) {
const jsHandle = await elementHandle.getProperty("id")
ids.push(await jsHandle.jsonValue());
}
console.log(ids);
await page.close();
await browser.close();
})();
结果:
P.S。在 puppeteer 中有一个等待的内置函数:
await page.waitForTimeout(4000);
我需要从 https://www.flashscore.es/ 中提取所有 div。 使用以下代码,我可以打印我需要的所有元素:
const puppeteer = require('puppeteer');
const fs = require('fs');
const list = [];
(async () => {
const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1920, height: 1080 }, args: ['--start-maximized'] });
const page = await browser.newPage();
let element, formElement, tabs;
await page.goto(`https://www.flashscore.es/baloncesto/`);
await delay(4000);
element = await page.$x(`//div[starts-with(@id,'g_3_')]`);
console.log(element);
})();
function delay(time) {
return new Promise(function(resolve) {
setTimeout(resolve, time)
});
}
元素的 HTML 看起来像这样:
<div id="g_3_Qyk44tlB" title="¡Haga click para detalles del partido!" elementtiming="SpeedCurveFRP" class="event__match event__match--live event__match--twoLine">
如何获取所需元素的id?
您已经获得了所需的元素,现在让我们遍历所有元素并提取“id”属性:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1920, height: 1080 }, args: ['--start-maximized'] });
const page = await browser.newPage();
await page.goto(`https://www.flashscore.es/baloncesto/`);
// More reliable than wait for an arbitrary N of seconds
await page.waitForSelector('[id^="g_3_"]');
const ids = [];
const elements = await page.$x(`//div[starts-with(@id,'g_3_')]`);
for(const elementHandle of elements) {
const jsHandle = await elementHandle.getProperty("id")
ids.push(await jsHandle.jsonValue());
}
console.log(ids);
await page.close();
await browser.close();
})();
结果:
P.S。在 puppeteer 中有一个等待的内置函数:
await page.waitForTimeout(4000);