Puppeteer 问题:如何遍历元素句柄?
Puppeteer question: How to loop thorough an elementhandle?
我有一个函数可以从 CSS 或 XPath 选择器中提取 textContent。
有人可以帮我实现这个吗?我很困惑
const nodes = await page.$$('css selector here');
for (const node of nodes) {
const stuff = await extractText(page, node, 'css selector to extract text') || null;
}
所以只有当节点有指定的选择器时才会提取文本,否则输出为null。
样本HTML:
<ul>
<!-- Nodes you would like to be evaluated -->
<li class="red">Color Red 1</li> <!-- Node you would like not to extractText and get null as result -->
<li class="red">Color Red 2</li> <!-- Node you would like not to extractText and get null as result -->
<li class="blue">Color Blue 1</li> <!-- Node you would like to extractText -->
<li class="blue">Color Blue 2</li> <!-- Node you would like to extractText -->
<!-- Nodes you would not like to be evaluated -->
<li class="green">Color Green 1</li>
<li class="green">Color Green 2</li>
<li class="yellow">Color Yellow 1</li>
<li class="yellow">Color Yellow 2</li>
</ul>
人偶代码:
const puppeteer = require('puppeteer');
const extractText = async (page, node, selector) => {
// In complex scenarios use nodeClassName will not be enough
// and you will need to use the Private API of ElementHandle _remoteObject
const nodeClassName = await node.getProperty('className')
const nodeJsonValue = await nodeClassName.jsonValue()
const nodeValueForComparison = '.' + nodeJsonValue
if(nodeValueForComparison === selector){
const text = await page.$eval(selector, element => element.innerText)
console.log(text)
return text
}
return null
}
(async() => {
const url = 'Your URL'
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto('url');
// Pass the selectors of Nodes you would like to be evaluated
// separated by a comma
const nodes = await page.$$('.red, .blue', node => node);
const result = [];
for (const node of nodes) {
const stuff = await extractText(page, node, '.blue');
result.push(stuff)
}
console.log(result)
await browser.close();
})();
如果我没理解错的话,你可以这样试试:
import puppeteer from 'puppeteer';
const browser = await puppeteer.launch();
const html = `
<!doctype html>
<html>
<head><meta charset='UTF-8'><title>Test</title></head>
<body>
<p>Text 1.</p>
<p>Text <span>2</span>.</p>
</body>
</html>`;
try {
const [page] = await browser.pages();
await page.goto(`data:text/html,${encodeURIComponent(html)}`);
const nodes = await page.$$('p');
for (const node of nodes) {
const stuff = await extractText(page, node, 'span') || null;
console.log(stuff);
}
} catch (err) { console.error(err); } finally { await browser.close(); }
function extractText(page, node, selector) {
return page.evaluate(
(node, selector) => node.querySelector(selector)?.innerText ?? null,
node,
selector,
);
}
我有一个函数可以从 CSS 或 XPath 选择器中提取 textContent。
有人可以帮我实现这个吗?我很困惑
const nodes = await page.$$('css selector here');
for (const node of nodes) {
const stuff = await extractText(page, node, 'css selector to extract text') || null;
}
所以只有当节点有指定的选择器时才会提取文本,否则输出为null。
样本HTML:
<ul>
<!-- Nodes you would like to be evaluated -->
<li class="red">Color Red 1</li> <!-- Node you would like not to extractText and get null as result -->
<li class="red">Color Red 2</li> <!-- Node you would like not to extractText and get null as result -->
<li class="blue">Color Blue 1</li> <!-- Node you would like to extractText -->
<li class="blue">Color Blue 2</li> <!-- Node you would like to extractText -->
<!-- Nodes you would not like to be evaluated -->
<li class="green">Color Green 1</li>
<li class="green">Color Green 2</li>
<li class="yellow">Color Yellow 1</li>
<li class="yellow">Color Yellow 2</li>
</ul>
人偶代码:
const puppeteer = require('puppeteer');
const extractText = async (page, node, selector) => {
// In complex scenarios use nodeClassName will not be enough
// and you will need to use the Private API of ElementHandle _remoteObject
const nodeClassName = await node.getProperty('className')
const nodeJsonValue = await nodeClassName.jsonValue()
const nodeValueForComparison = '.' + nodeJsonValue
if(nodeValueForComparison === selector){
const text = await page.$eval(selector, element => element.innerText)
console.log(text)
return text
}
return null
}
(async() => {
const url = 'Your URL'
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto('url');
// Pass the selectors of Nodes you would like to be evaluated
// separated by a comma
const nodes = await page.$$('.red, .blue', node => node);
const result = [];
for (const node of nodes) {
const stuff = await extractText(page, node, '.blue');
result.push(stuff)
}
console.log(result)
await browser.close();
})();
如果我没理解错的话,你可以这样试试:
import puppeteer from 'puppeteer';
const browser = await puppeteer.launch();
const html = `
<!doctype html>
<html>
<head><meta charset='UTF-8'><title>Test</title></head>
<body>
<p>Text 1.</p>
<p>Text <span>2</span>.</p>
</body>
</html>`;
try {
const [page] = await browser.pages();
await page.goto(`data:text/html,${encodeURIComponent(html)}`);
const nodes = await page.$$('p');
for (const node of nodes) {
const stuff = await extractText(page, node, 'span') || null;
console.log(stuff);
}
} catch (err) { console.error(err); } finally { await browser.close(); }
function extractText(page, node, selector) {
return page.evaluate(
(node, selector) => node.querySelector(selector)?.innerText ?? null,
node,
selector,
);
}