无法抓取 Facebook 市场中的所有广告
can't scrape all adds in facebook marketplace
我正在尝试抓取 Facebook 市场,但只有第一个添加被抓取,任何人都可以建议一种方法来抓取整个添加列表
代码
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
await page.goto(url, {
waitUntil: 'networkidle2',
});
//await autoScroll(page);
console.log('scraping...')
await delay(4000);
let data = await page.evaluate(() => {
let carData = document.querySelectorAll('div [class = "fome6x0j tkqzz1yd aodizinl fjf4s8hc f7vcsfb0"]');
const cars = []
carData.forEach((element) => {
// cars.push(element.innerText)
let add = element.querySelector('div [class = "sonix8o1"]');
cars.push(add.innerText)
})
return cars
});
console.log(data)
})();
这是我得到的输出,我想要实现的是单独访问每个数据
你可以试试这个。
(async () => {
let browser, page;
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
try {
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
let data = await page.evaluate(() => {
let cars = [];
if (document.querySelectorAll('div[class="sonix8o1"]')) {
document.querySelectorAll('div[class="sonix8o1"]').forEach(element => {
cars.push(element.innerText);
});
}
return cars;
});
console.log(data);
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();
或者你也可以这样试试
(async () => {
let browser, page;
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
try {
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
let data = await page.evaluate(() => {
let cars = [];
if (document.querySelectorAll('div[class="sonix8o1"]')) {
let elements = document.querySelectorAll('div[class="sonix8o1"]');
for (let i = 0; i < elements.length; i++) {
cars.push(elements[i].innerText);
}
}
return cars;
});
console.log(data);
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();
代码
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
await page.goto(url, {
waitUntil: 'networkidle2',
});
//await autoScroll(page);
console.log('scraping...')
await delay(4000);
let data = await page.evaluate(() => {
let carData = document.querySelectorAll('div [class = "fome6x0j tkqzz1yd aodizinl fjf4s8hc f7vcsfb0"]');
const cars = []
carData.forEach((element) => {
// cars.push(element.innerText)
let add = element.querySelector('div [class = "sonix8o1"]');
cars.push(add.innerText)
})
return cars
});
console.log(data)
})();
这是我得到的输出,我想要实现的是单独访问每个数据
你可以试试这个。
(async () => {
let browser, page;
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
try {
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
let data = await page.evaluate(() => {
let cars = [];
if (document.querySelectorAll('div[class="sonix8o1"]')) {
document.querySelectorAll('div[class="sonix8o1"]').forEach(element => {
cars.push(element.innerText);
});
}
return cars;
});
console.log(data);
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();
或者你也可以这样试试
(async () => {
let browser, page;
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
try {
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
let data = await page.evaluate(() => {
let cars = [];
if (document.querySelectorAll('div[class="sonix8o1"]')) {
let elements = document.querySelectorAll('div[class="sonix8o1"]');
for (let i = 0; i < elements.length; i++) {
cars.push(elements[i].innerText);
}
}
return cars;
});
console.log(data);
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();