循环浏览页面以 return 所有产品链接
Loop through pages to return all the product links
目标是获取分页所有页面的所有产品链接。到目前为止,我已经设法使用 console.log (链接)将信息打印到控制台。但是,由于我是这个领域的新手并且完全没有经验,所以我遇到了如何使用 return 命令传回一些值的问题。 return links
.
使用 console.log(links)
我收到警告:getLinks 不可迭代
const puppeteer = require('puppeteer')
async function getLinks(){
const browser = await puppeteer.launch({headless: false, defaultViewport: null});
const page = await browser.newPage();
const url = "https://example.com/product-category?p=1&nidx"
await page.goto(url)
while(await page.$('.change-country-buttons > button:nth-child(1)')){
await page.waitForTimeout(2000);
await page.keyboard.press('ArrowDown');
await page.waitForSelector('.change-country-buttons');
await page.waitForTimeout(2000);
await page.click('.change-country-buttons > button:nth-child(1)');
await page.waitForTimeout(2000);
}
while(await page.$(".pagination .pagination--next")){
await page.waitForTimeout(2000);
await page.evaluate(() => {
document.querySelector(".pagination .pagination--next").scrollIntoView();
});
await page.waitForTimeout(1000);
await page.waitForSelector(".pagination .pagination--next")
await page.waitForTimeout(500);
await page.click('.pagination .pagination--next')
const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
await page.waitForTimeout(1500);
console.log(links)
}
}
return links // Is returning links only form the first page and then the loop stops
我用 Promise.all () 尝试了一些东西,但我并不完全清楚如何去做。
请帮助和温柔,因为我刚刚开始学习基础知识
您需要创建一个数组并将每个页面的所有头盔链接推送到它。
这对我来说测试成功了。
const puppeteer = require('puppeteer')
async function getLinks(){
const browser = await puppeteer.launch({headless: false, defaultViewport: null});
const page = await browser.newPage();
const url = "https://www.motocard.com/en/motorcycle-road-gear/helmets/precio_150-3200/full-face?p=1&nidx"
var all_links = [];
await page.goto(url);
while(await page.$('.change-country-buttons > button:nth-child(1)')){
await page.waitForTimeout(2000);
await page.keyboard.press('ArrowDown');
await page.waitForSelector('.change-country-buttons');
await page.waitForTimeout(2000);
await page.click('.change-country-buttons > button:nth-child(1)');
await page.waitForTimeout(2000);
}
while(await page.$(".pagination .pagination--next")){
await page.waitForTimeout(2000);
await page.evaluate(() => {
document.querySelector(".pagination .pagination--next").scrollIntoView();
});
await page.waitForTimeout(1000);
await page.waitForSelector(".pagination .pagination--next")
await page.waitForTimeout(500);
await page.click('.pagination .pagination--next')
const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
await page.waitForTimeout(1500);
//console.log(links)
all_links.push(...links);
}
return all_links;
}
(async ()=>{
var links = await getLinks();
console.log('done');
console.log(links);
})();
目标是获取分页所有页面的所有产品链接。到目前为止,我已经设法使用 console.log (链接)将信息打印到控制台。但是,由于我是这个领域的新手并且完全没有经验,所以我遇到了如何使用 return 命令传回一些值的问题。 return links
.
使用 console.log(links)
我收到警告:getLinks 不可迭代
const puppeteer = require('puppeteer')
async function getLinks(){
const browser = await puppeteer.launch({headless: false, defaultViewport: null});
const page = await browser.newPage();
const url = "https://example.com/product-category?p=1&nidx"
await page.goto(url)
while(await page.$('.change-country-buttons > button:nth-child(1)')){
await page.waitForTimeout(2000);
await page.keyboard.press('ArrowDown');
await page.waitForSelector('.change-country-buttons');
await page.waitForTimeout(2000);
await page.click('.change-country-buttons > button:nth-child(1)');
await page.waitForTimeout(2000);
}
while(await page.$(".pagination .pagination--next")){
await page.waitForTimeout(2000);
await page.evaluate(() => {
document.querySelector(".pagination .pagination--next").scrollIntoView();
});
await page.waitForTimeout(1000);
await page.waitForSelector(".pagination .pagination--next")
await page.waitForTimeout(500);
await page.click('.pagination .pagination--next')
const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
await page.waitForTimeout(1500);
console.log(links)
}
}
return links // Is returning links only form the first page and then the loop stops
我用 Promise.all () 尝试了一些东西,但我并不完全清楚如何去做。
请帮助和温柔,因为我刚刚开始学习基础知识
您需要创建一个数组并将每个页面的所有头盔链接推送到它。
这对我来说测试成功了。
const puppeteer = require('puppeteer')
async function getLinks(){
const browser = await puppeteer.launch({headless: false, defaultViewport: null});
const page = await browser.newPage();
const url = "https://www.motocard.com/en/motorcycle-road-gear/helmets/precio_150-3200/full-face?p=1&nidx"
var all_links = [];
await page.goto(url);
while(await page.$('.change-country-buttons > button:nth-child(1)')){
await page.waitForTimeout(2000);
await page.keyboard.press('ArrowDown');
await page.waitForSelector('.change-country-buttons');
await page.waitForTimeout(2000);
await page.click('.change-country-buttons > button:nth-child(1)');
await page.waitForTimeout(2000);
}
while(await page.$(".pagination .pagination--next")){
await page.waitForTimeout(2000);
await page.evaluate(() => {
document.querySelector(".pagination .pagination--next").scrollIntoView();
});
await page.waitForTimeout(1000);
await page.waitForSelector(".pagination .pagination--next")
await page.waitForTimeout(500);
await page.click('.pagination .pagination--next')
const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
await page.waitForTimeout(1500);
//console.log(links)
all_links.push(...links);
}
return all_links;
}
(async ()=>{
var links = await getLinks();
console.log('done');
console.log(links);
})();