木偶操纵者相当于柏树的 contains() 方法
puppeteer equivalent of cypress contains() method
HTML 看起来像这样:
const htmlStr = `
<div>
<div>
<h1>title1</h1>
<div>
<a>click me<a>
</div>
</div>
<div>
<h1>title2</h1>
<div>
<a>click me<a>
</div>
</div>
<div>
<h1>title3</h1>
<div>
<a>click me<a>
</div>
</div>
</div>
`
我要点击第一个click me
。
使用 cypress 我可以做如下事情:
cy.contains('div', 'title1').within(() => {
cy.get('a').click()
})
对于这个例子,有很多方法可以做到。但是思路是find the nearest <div> who contains text 'title1'. And start from there, find <a> inside it
.
在Puppeteer我想要一些如下:
const element = await page.elementContains('div', 'title1') // <- narrow down
await element.click('a')
如何实现elementContains()
功能,有什么想法吗?谢谢!
-----更新-----
为了更清楚,使用 elementContains()
可以:
const element1 = await page.elementContains('div', 'title1')
await element1.click('a') // click first `click me`
const element2 = await page.elementContains('div', 'title2')
await element2.click('a') // click second `click me`
const element3 = await page.elementContains('div', 'title3')
await element3.click('a') // click third `click me`
如果我理解正确,这些是 XPath 和选择器等价物(https://example.org/ 恰好具有类似的 DOM 结构):
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/');
const [elemByXPath] = await page.$x('//div[h1[contains(., "Example Domain")]]//a');
const elemBySelector = await page.evaluateHandle(
() => [...document.querySelectorAll('div')]
.find(
div => [...div.querySelectorAll('h1')]
.some(h1 => h1.innerText.includes('Example Domain'))
)
.querySelector('a')
);
console.log(elemByXPath.toString());
console.log(elemBySelector.toString());
await browser.close();
} catch (err) {
console.error(err);
}
})();
您可以使用 prototype
轻松地向 Page
添加额外的功能。并使用 page.elementHandle.
获取特定元素
page.evaluate
和 page.evaluateHandle
之间的唯一区别是 page.evaluateHandle
returns 页内对象 (JSHandle
)。
创建 elementContains 函数
Puppeteer 模块像这样导出 类。您可以根据需要扩展它们的功能。
// extract the Page class
const { Page } = require("puppeteer/lib/Page");
通常情况下,您创建的 page
会在原型中变为 this
。 page.evaluateHandle
将变为 this.evaluateHandle
。
/**
* @name elementContains
* @param {String} selector specific selector globally search and match
* @param {String} text filter the elements with the specified text
* @returns {Promise} elementHandle
*/
Page.prototype.elementContains = function elementContains(...args) {
return this.evaluateHandle((selector, text) => {
// get all selectors for this specific selector
const elements = [...document.querySelectorAll(selector)];
// find element by text
const results = elements.filter(element => element.innerText.includes(text));
// get the last element because that's how querySelectorAll serializes the result
return results[results.length-1];
}, ...args);
};
创建 .get 函数
既然我们得到了很棒的 elementContains
,是时候得到 get
函数了。
/**
* Replicate the .get function
* gets an element from the executionContext
* @param {String} selector
* @returns {Promise}
*/
const { JSHandle } = require("puppeteer/lib/JSHandle");
JSHandle.prototype.get = function get(selector) {
// get the context and evaluate inside
return this._context.evaluateHandle(
(element, selector) => {
return element.querySelector(selector);
},
// pass the JSHandle which is itself
this,
selector
);
};
享受新功能
(async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.setContent(html); // your specified html text
// get the element
const elem = await page.elementContains('div', 'title1')
// use it like any other normal element, click it, eval it, remove it etc.
const content = await elem.$eval('h1', e=>e.innerText);
console.log(content) // prints "title1"
// OR use the built in click function
const btn = await page.$('a', elem); // <-- pass the handle here
await btn.click();
// OR use our .get function to get another element
const targetBtn = await elem.get('a');
targetBtn.click(); // click it
})();
结果:
HTML 看起来像这样:
const htmlStr = `
<div>
<div>
<h1>title1</h1>
<div>
<a>click me<a>
</div>
</div>
<div>
<h1>title2</h1>
<div>
<a>click me<a>
</div>
</div>
<div>
<h1>title3</h1>
<div>
<a>click me<a>
</div>
</div>
</div>
`
我要点击第一个click me
。
使用 cypress 我可以做如下事情:
cy.contains('div', 'title1').within(() => {
cy.get('a').click()
})
对于这个例子,有很多方法可以做到。但是思路是find the nearest <div> who contains text 'title1'. And start from there, find <a> inside it
.
在Puppeteer我想要一些如下:
const element = await page.elementContains('div', 'title1') // <- narrow down
await element.click('a')
如何实现elementContains()
功能,有什么想法吗?谢谢!
-----更新-----
为了更清楚,使用 elementContains()
可以:
const element1 = await page.elementContains('div', 'title1')
await element1.click('a') // click first `click me`
const element2 = await page.elementContains('div', 'title2')
await element2.click('a') // click second `click me`
const element3 = await page.elementContains('div', 'title3')
await element3.click('a') // click third `click me`
如果我理解正确,这些是 XPath 和选择器等价物(https://example.org/ 恰好具有类似的 DOM 结构):
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/');
const [elemByXPath] = await page.$x('//div[h1[contains(., "Example Domain")]]//a');
const elemBySelector = await page.evaluateHandle(
() => [...document.querySelectorAll('div')]
.find(
div => [...div.querySelectorAll('h1')]
.some(h1 => h1.innerText.includes('Example Domain'))
)
.querySelector('a')
);
console.log(elemByXPath.toString());
console.log(elemBySelector.toString());
await browser.close();
} catch (err) {
console.error(err);
}
})();
您可以使用 prototype
轻松地向 Page
添加额外的功能。并使用 page.elementHandle.
page.evaluate
和 page.evaluateHandle
之间的唯一区别是 page.evaluateHandle
returns 页内对象 (JSHandle
)。
创建 elementContains 函数
Puppeteer 模块像这样导出 类。您可以根据需要扩展它们的功能。
// extract the Page class
const { Page } = require("puppeteer/lib/Page");
通常情况下,您创建的 page
会在原型中变为 this
。 page.evaluateHandle
将变为 this.evaluateHandle
。
/**
* @name elementContains
* @param {String} selector specific selector globally search and match
* @param {String} text filter the elements with the specified text
* @returns {Promise} elementHandle
*/
Page.prototype.elementContains = function elementContains(...args) {
return this.evaluateHandle((selector, text) => {
// get all selectors for this specific selector
const elements = [...document.querySelectorAll(selector)];
// find element by text
const results = elements.filter(element => element.innerText.includes(text));
// get the last element because that's how querySelectorAll serializes the result
return results[results.length-1];
}, ...args);
};
创建 .get 函数
既然我们得到了很棒的 elementContains
,是时候得到 get
函数了。
/**
* Replicate the .get function
* gets an element from the executionContext
* @param {String} selector
* @returns {Promise}
*/
const { JSHandle } = require("puppeteer/lib/JSHandle");
JSHandle.prototype.get = function get(selector) {
// get the context and evaluate inside
return this._context.evaluateHandle(
(element, selector) => {
return element.querySelector(selector);
},
// pass the JSHandle which is itself
this,
selector
);
};
享受新功能
(async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.setContent(html); // your specified html text
// get the element
const elem = await page.elementContains('div', 'title1')
// use it like any other normal element, click it, eval it, remove it etc.
const content = await elem.$eval('h1', e=>e.innerText);
console.log(content) // prints "title1"
// OR use the built in click function
const btn = await page.$('a', elem); // <-- pass the handle here
await btn.click();
// OR use our .get function to get another element
const targetBtn = await elem.get('a');
targetBtn.click(); // click it
})();
结果: