Puppeteer: page.evaluate throws TypeError: cannot read innerText of null
Puppeteer: page.evaluate throws TypeError: cannot read innerText of null
我正在尝试学习木偶操作,想在 https://www.tapology.com/regions 抓取这个 MMA 网页。
我想抓取所有区域标题,例如 "US MIDWEST, US NORTHEAST, US SOUTHWEST... etc"
我已经尝试使用 await page.waitForSelector 来等待选择器加载,但页面只是挂起。
我也尝试过使用 innerHTML,但结果相同。
尝试使用 page.$$eval(selector, pageFunction[ ...args]) 但 returns 空数组。
尝试使用 Google 开发工具中的精确 CSS 选择器,但没有成功。尝试了 CSS 选择的各种组合,但我仍然无法抓取 h4 的文本。
const puppeteer = require('puppeteer');
const REGIONS_URL = 'https://www.tapology.com/regions';
async function getRegionsNames(url) {
const browser= await puppeteer.launch();
const page= await browser.newPage();
await (async ()=>{
const MIDWEST_SELECTOR = "#content > div.regionIndex > h4:nth- child(1) > a";
//await page.waitForSelector(MIDWEST_SELECTOR); //hangs if used
const mid = await page.evaluate((sel)=>{ //trying to grab 'MIDWEST'
return document.querySelector(sel).innerText;
},MIDWEST_SELECTOR);
console.log(mid); //throws error
await browser.close();
}
getRegionsNames(REGIONS_URL);
(节点:23646)UnhandledPromiseRejectionWarning:错误:评估失败:TypeError:无法读取 属性 'innerText' of null
试试这个:
app.get('/testing',function(req,res){
(async () => {
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto('https://www.tapology.com/regions',{waitUntil: 'domcontentloaded'});
const example = await page.$('.regionIndex');
const scrapedData = await page.evaluate(() =>
Array.from(document.querySelectorAll('h4 a'))
.map(link => ({
title: link.innerHTML,
link: link.getAttribute('href')
}))
)
console.log('scrapedData',scrapedData);
await page.close();
await browser.close();
return res.send(scrapedData);
})();
});
您将获得:
[
{
title: "US Midwest",
link: "/regions/us-midwest"
},
{
title: "US Northeast",
link: "/regions/us-northeast"
},
{
title: "US Southeast",
link: "/regions/us-southeast"
},
{
title: "US Southwest",
link: "/regions/us-southwest"
},
{
title: "US West",
link: "/regions/us-west"
},
{
title: "Asia Central",
link: "/regions/central-asia"
},
{
title: "Canada",
link: "/regions/canada"
},
{
title: "Europe Balkans",
link: "/regions/europe-balkans"
},
{
title: "Europe Eastern",
link: "/regions/europe-eastern"
},
{
title: "Europe Western",
link: "/regions/western-europe"
},
{
title: "Latin America",
link: "/regions/latin-america"
},
{
title: "Middle East",
link: "/regions/middle-east"
}
]
我正在尝试学习木偶操作,想在 https://www.tapology.com/regions 抓取这个 MMA 网页。
我想抓取所有区域标题,例如 "US MIDWEST, US NORTHEAST, US SOUTHWEST... etc"
我已经尝试使用 await page.waitForSelector 来等待选择器加载,但页面只是挂起。
我也尝试过使用 innerHTML,但结果相同。
尝试使用 page.$$eval(selector, pageFunction[ ...args]) 但 returns 空数组。
尝试使用 Google 开发工具中的精确 CSS 选择器,但没有成功。尝试了 CSS 选择的各种组合,但我仍然无法抓取 h4 的文本。
const puppeteer = require('puppeteer');
const REGIONS_URL = 'https://www.tapology.com/regions';
async function getRegionsNames(url) {
const browser= await puppeteer.launch();
const page= await browser.newPage();
await (async ()=>{
const MIDWEST_SELECTOR = "#content > div.regionIndex > h4:nth- child(1) > a";
//await page.waitForSelector(MIDWEST_SELECTOR); //hangs if used
const mid = await page.evaluate((sel)=>{ //trying to grab 'MIDWEST'
return document.querySelector(sel).innerText;
},MIDWEST_SELECTOR);
console.log(mid); //throws error
await browser.close();
}
getRegionsNames(REGIONS_URL);
(节点:23646)UnhandledPromiseRejectionWarning:错误:评估失败:TypeError:无法读取 属性 'innerText' of null
试试这个:
app.get('/testing',function(req,res){
(async () => {
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto('https://www.tapology.com/regions',{waitUntil: 'domcontentloaded'});
const example = await page.$('.regionIndex');
const scrapedData = await page.evaluate(() =>
Array.from(document.querySelectorAll('h4 a'))
.map(link => ({
title: link.innerHTML,
link: link.getAttribute('href')
}))
)
console.log('scrapedData',scrapedData);
await page.close();
await browser.close();
return res.send(scrapedData);
})();
});
您将获得:
[
{
title: "US Midwest",
link: "/regions/us-midwest"
},
{
title: "US Northeast",
link: "/regions/us-northeast"
},
{
title: "US Southeast",
link: "/regions/us-southeast"
},
{
title: "US Southwest",
link: "/regions/us-southwest"
},
{
title: "US West",
link: "/regions/us-west"
},
{
title: "Asia Central",
link: "/regions/central-asia"
},
{
title: "Canada",
link: "/regions/canada"
},
{
title: "Europe Balkans",
link: "/regions/europe-balkans"
},
{
title: "Europe Eastern",
link: "/regions/europe-eastern"
},
{
title: "Europe Western",
link: "/regions/western-europe"
},
{
title: "Latin America",
link: "/regions/latin-america"
},
{
title: "Middle East",
link: "/regions/middle-east"
}
]