Puppeteer 从 elementHandle 获取元素导致协议错误
Puppeteer getting element from elementHandle causing protocol error
我正在尝试抓取某个 Facebook 页面,以获取其由某个用户撰写并以某个词开头的帖子。
const puppeteer = require('puppeteer');
async function findPosts(page) {
const USERNAME = 'test123';
const posts = await page.$$('.userContentWrapper');
return posts.filter(async post => {
try {
let usernameElement = await post.$('.fwb');
let username = await page.evaluate(element => element.textContent, usernameElement);
if (username === USERNAME) {
let postElement = await post.$('[data-testid="post_message"] p');
let postContent = page.evaluate(element => element.textContent, postElement);
return /\[test \d+\]/.test(postContent);
}
return false;
} catch(e) {
console.log(e);
return false;
}
});
}
(async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('https://www.facebook.com/groups/groupid/');
const pageTitle = await page.title();
console.log(pageTitle);
const posts = await findPosts(page);
console.log(posts);
await browser.close();
})();
我得到
Error: Protocol error (Runtime.callFunctionOn): Target closed. when
I'm trying to get the usernameElement
在这一行:
let usernameElement = await post.$('.fwb');
不确定这里出了什么问题,有什么建议吗?
问题是 filter
函数不适用于 Promises。所以 return posts.filter(...)
会立即 return 然后浏览器关闭。因此,当您尝试 运行 页面上的 $
函数时,该页面不再存在并且您得到 Target closed
错误。
要使其使用 async/await 语法,您可以使用一个简单的循环:
async function findPosts(page) {
const USERNAME = 'test123';
const posts = await page.$$('.userContentWrapper');
const postsToReturn = [];
for (let post of posts) {
/* ... if else logic */
postsToReturn.push(post); // instead of return true
}
return postsToReturn;
}
我正在尝试抓取某个 Facebook 页面,以获取其由某个用户撰写并以某个词开头的帖子。
const puppeteer = require('puppeteer');
async function findPosts(page) {
const USERNAME = 'test123';
const posts = await page.$$('.userContentWrapper');
return posts.filter(async post => {
try {
let usernameElement = await post.$('.fwb');
let username = await page.evaluate(element => element.textContent, usernameElement);
if (username === USERNAME) {
let postElement = await post.$('[data-testid="post_message"] p');
let postContent = page.evaluate(element => element.textContent, postElement);
return /\[test \d+\]/.test(postContent);
}
return false;
} catch(e) {
console.log(e);
return false;
}
});
}
(async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('https://www.facebook.com/groups/groupid/');
const pageTitle = await page.title();
console.log(pageTitle);
const posts = await findPosts(page);
console.log(posts);
await browser.close();
})();
我得到
Error: Protocol error (Runtime.callFunctionOn): Target closed. when I'm trying to get the usernameElement
在这一行:
let usernameElement = await post.$('.fwb');
不确定这里出了什么问题,有什么建议吗?
问题是 filter
函数不适用于 Promises。所以 return posts.filter(...)
会立即 return 然后浏览器关闭。因此,当您尝试 运行 页面上的 $
函数时,该页面不再存在并且您得到 Target closed
错误。
要使其使用 async/await 语法,您可以使用一个简单的循环:
async function findPosts(page) {
const USERNAME = 'test123';
const posts = await page.$$('.userContentWrapper');
const postsToReturn = [];
for (let post of posts) {
/* ... if else logic */
postsToReturn.push(post); // instead of return true
}
return postsToReturn;
}