Puppeteer:从数据库中的 for 循环保存数据
Puppeteer: Save data from a for loop in database
我正在抓取网站并设法在 for 循环中提取数据。
但是,我不知道如何将它保存到我的 MongoDB 数据库中,因为我收到错误消息 ReferenceError: nameElement is not defined
。
如何将 for 循环的结果作为对象保存到数据库中?
const kclResults = [];
async function scrapeInfiniteScrollItems(
page,
scrollDelay = 10000
) {
try {
const html = await page.content();
const $ = cheerio.load(html);
await page.evaluate(() => {
let elements = $("[role='listitem']")
.find("._2DX0iPG8PDF3Si_o5PlzIj")
.toArray();
for (i = 0; i < elements.length; i++) {
$(elements[i]).click();
const nameElement = $("[data-log-name='PersonName']").text();
const emailElement = $("[data-log-name='Email']").text();
const allElements = $("[aria-label='Contact information']").text();
const officeLocation = $("[data-log-name='OfficeLocation']").text();
const position = $("[data-log-name='Company']").text();
const jobTitle = $("[data-log-name='JobTitle']").text();
const departament = $("[data-log-name='Department']").text();
console.log(
`email: ${emailElement} name: ${nameElement} allElements: ${allElements} \n office location: ${officeLocation} \n position: ${position} \n jobTitle: ${jobTitle} \n departament: ${departament}`
);
}
});
let kclResult = new KingsDB({
nameElement,
emailElement,
allElements,
officeLocation,
position,
jobTitle,
departament,
});
kclResults.push(kclResult);
console.log(kclResults);
kclResult.save();
return kclResults;
} catch (error) {
console.log(error);
}
}
您正在 for 循环范围内声明 nameElement
(和其他变量)并试图在该范围外访问它。
只需创建一个“元素”数组,然后在将其写入数据库时对其进行迭代。下面的代码应该可以工作:
const kclResults = [];
async function scrapeInfiniteScrollItems(
page,
scrollDelay = 10000
) {
try {
const html = await page.content();
const $ = cheerio.load(html);
const resultArr = await page.evaluate(() => {
let elements = $("[role='listitem']")
.find("._2DX0iPG8PDF3Si_o5PlzIj")
.toArray();
const resultArr = [];
for (i = 0; i < elements.length; i++) {
$(elements[i]).click();
const nameElement = $("[data-log-name='PersonName']").text();
const emailElement = $("[data-log-name='Email']").text();
const allElements = $("[aria-label='Contact information']").text();
const officeLocation = $("[data-log-name='OfficeLocation']").text();
const position = $("[data-log-name='Company']").text();
const jobTitle = $("[data-log-name='JobTitle']").text();
const departament = $("[data-log-name='Department']").text();
resultArr.push({
nameElement,
emailElement,
allElements,
officeLocation,
position,
jobTitle,
departament
});
console.log(
`email: ${emailElement} name: ${nameElement} allElements: ${allElements} \n office location: ${officeLocation} \n position: ${position} \n jobTitle: ${jobTitle} \n departament: ${departament}`
);
}
return resultArr;
});
const kclResults = [];
for (let result of resultArr) {
const {
nameElement,
emailElement,
allElements,
officeLocation,
position,
jobTitle,
departament
} = result;
let kclResult = new KingsDB({
nameElement,
emailElement,
allElements,
officeLocation,
position,
jobTitle,
departament,
});
kclResults.push(kclResult);
console.log(kclResults);
kclResults.push(kclResult.save());
}
return kclResults;
} catch (error) {
console.log(error);
}
}
PS:传递给 pageEvaluate 的函数在浏览器上下文中运行,因此无法访问您的节点变量,直到它们作为参数显式传递.
我正在抓取网站并设法在 for 循环中提取数据。
但是,我不知道如何将它保存到我的 MongoDB 数据库中,因为我收到错误消息 ReferenceError: nameElement is not defined
。
如何将 for 循环的结果作为对象保存到数据库中?
const kclResults = [];
async function scrapeInfiniteScrollItems(
page,
scrollDelay = 10000
) {
try {
const html = await page.content();
const $ = cheerio.load(html);
await page.evaluate(() => {
let elements = $("[role='listitem']")
.find("._2DX0iPG8PDF3Si_o5PlzIj")
.toArray();
for (i = 0; i < elements.length; i++) {
$(elements[i]).click();
const nameElement = $("[data-log-name='PersonName']").text();
const emailElement = $("[data-log-name='Email']").text();
const allElements = $("[aria-label='Contact information']").text();
const officeLocation = $("[data-log-name='OfficeLocation']").text();
const position = $("[data-log-name='Company']").text();
const jobTitle = $("[data-log-name='JobTitle']").text();
const departament = $("[data-log-name='Department']").text();
console.log(
`email: ${emailElement} name: ${nameElement} allElements: ${allElements} \n office location: ${officeLocation} \n position: ${position} \n jobTitle: ${jobTitle} \n departament: ${departament}`
);
}
});
let kclResult = new KingsDB({
nameElement,
emailElement,
allElements,
officeLocation,
position,
jobTitle,
departament,
});
kclResults.push(kclResult);
console.log(kclResults);
kclResult.save();
return kclResults;
} catch (error) {
console.log(error);
}
}
您正在 for 循环范围内声明 nameElement
(和其他变量)并试图在该范围外访问它。
只需创建一个“元素”数组,然后在将其写入数据库时对其进行迭代。下面的代码应该可以工作:
const kclResults = [];
async function scrapeInfiniteScrollItems(
page,
scrollDelay = 10000
) {
try {
const html = await page.content();
const $ = cheerio.load(html);
const resultArr = await page.evaluate(() => {
let elements = $("[role='listitem']")
.find("._2DX0iPG8PDF3Si_o5PlzIj")
.toArray();
const resultArr = [];
for (i = 0; i < elements.length; i++) {
$(elements[i]).click();
const nameElement = $("[data-log-name='PersonName']").text();
const emailElement = $("[data-log-name='Email']").text();
const allElements = $("[aria-label='Contact information']").text();
const officeLocation = $("[data-log-name='OfficeLocation']").text();
const position = $("[data-log-name='Company']").text();
const jobTitle = $("[data-log-name='JobTitle']").text();
const departament = $("[data-log-name='Department']").text();
resultArr.push({
nameElement,
emailElement,
allElements,
officeLocation,
position,
jobTitle,
departament
});
console.log(
`email: ${emailElement} name: ${nameElement} allElements: ${allElements} \n office location: ${officeLocation} \n position: ${position} \n jobTitle: ${jobTitle} \n departament: ${departament}`
);
}
return resultArr;
});
const kclResults = [];
for (let result of resultArr) {
const {
nameElement,
emailElement,
allElements,
officeLocation,
position,
jobTitle,
departament
} = result;
let kclResult = new KingsDB({
nameElement,
emailElement,
allElements,
officeLocation,
position,
jobTitle,
departament,
});
kclResults.push(kclResult);
console.log(kclResults);
kclResults.push(kclResult.save());
}
return kclResults;
} catch (error) {
console.log(error);
}
}
PS:传递给 pageEvaluate 的函数在浏览器上下文中运行,因此无法访问您的节点变量,直到它们作为参数显式传递.