如何使用查询 select all 拆分从 html 呈现的句子
how to split the sentence that is rendered from html using query select all
(async () => {
let browser, page;
let url = "https://www.facebook.com/marketplace/nyc/search/?query=cars";
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.goto(url, { waitUntil: "networkidle2" });
console.log("scraping...");
let data = await page.evaluate(() => {
let cars = [];
if (
document.querySelectorAll(
'div[class="fome6x0j tkqzz1yd aodizinl fjf4s8hc f7vcsfb0"]'
)
) {
document.querySelectorAll(
'div[class="fome6x0j tkqzz1yd aodizinl fjf4s8hc f7vcsfb0"]'
)
.forEach((element) => {
let prices = element.querySelectorAll(
'span[class="d2edcug0 hpfvmrgz qv66sw1b"]'
);
let listings = element.querySelectorAll(
'span[class="a8c37x1j ni8dbmo4 stjgntxs l9j0dhe7"]'
);
for (let i = 0; i < addLists.length; i++) {
const car = {
price: prices[i].innerText,
listing: listings[i].innerText,
sentence1:''
sentence2:''
};
cars.push(car);
}
});
}
return cars;
});
console.log(data);
}
})();
如何拆分使用listings渲染的句子,listings是facebook添加标题的列表,我试过使用split()函数但是它不起作用,反正我可以拆分那个句子并存储它在 sentence1 和 sentence2
你可以试试这个。
(async () => {
let browser, page;
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
try {
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
let data = await page.evaluate(() => {
let cars = [];
if (document.getElementsByClassName('sonix8o1')) {
Array.from(document.getElementsByClassName('sonix8o1')).forEach(element => {
let text = element.innerText;
if (text != "" && text != "Create New Listing" && text != "Log In" && text != "Learn More") {
let carElements = element.getElementsByClassName('rq0escxv')[0];
let car_info = carElements.getElementsByClassName('aahdfvyu');
let car = {
price: car_info[0].innerText,
listing: car_info[1].innerText,
sentence1: car_info[2].innerText,
sentence2: car_info[3].innerText
};
cars.push(car);
}
});
}
return cars;
});
data.forEach(e => console.log(e));
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();
根据评论更新。
(async () => {
let browser, page;
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
try {
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
let data = await page.evaluate(() => {
let cars = [];
if (document.getElementsByClassName('sonix8o1')) {
Array.from(document.getElementsByClassName('sonix8o1')).forEach(element => {
let text = element.innerText;
if (text != "" && text != "Create New Listing" && text != "Log In" && text != "Learn More") {
let carElements = element.getElementsByClassName('rq0escxv')[0];
let car_info = carElements.getElementsByClassName('aahdfvyu');
let splitText = car_info[1].innerText.split(' ');
let car = {
price: car_info[0].innerText,
listing: car_info[1].innerText,
sentence1: splitText[0],
sentence2: splitText[1]
};
cars.push(car);
}
});
}
return cars;
});
data.forEach(e => console.log(e));
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();
那个returns:
(async () => {
let browser, page;
let url = "https://www.facebook.com/marketplace/nyc/search/?query=cars";
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.goto(url, { waitUntil: "networkidle2" });
console.log("scraping...");
let data = await page.evaluate(() => {
let cars = [];
if (
document.querySelectorAll(
'div[class="fome6x0j tkqzz1yd aodizinl fjf4s8hc f7vcsfb0"]'
)
) {
document.querySelectorAll(
'div[class="fome6x0j tkqzz1yd aodizinl fjf4s8hc f7vcsfb0"]'
)
.forEach((element) => {
let prices = element.querySelectorAll(
'span[class="d2edcug0 hpfvmrgz qv66sw1b"]'
);
let listings = element.querySelectorAll(
'span[class="a8c37x1j ni8dbmo4 stjgntxs l9j0dhe7"]'
);
for (let i = 0; i < addLists.length; i++) {
const car = {
price: prices[i].innerText,
listing: listings[i].innerText,
sentence1:''
sentence2:''
};
cars.push(car);
}
});
}
return cars;
});
console.log(data);
}
})();
如何拆分使用listings渲染的句子,listings是facebook添加标题的列表,我试过使用split()函数但是它不起作用,反正我可以拆分那个句子并存储它在 sentence1 和 sentence2
你可以试试这个。
(async () => {
let browser, page;
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
try {
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
let data = await page.evaluate(() => {
let cars = [];
if (document.getElementsByClassName('sonix8o1')) {
Array.from(document.getElementsByClassName('sonix8o1')).forEach(element => {
let text = element.innerText;
if (text != "" && text != "Create New Listing" && text != "Log In" && text != "Learn More") {
let carElements = element.getElementsByClassName('rq0escxv')[0];
let car_info = carElements.getElementsByClassName('aahdfvyu');
let car = {
price: car_info[0].innerText,
listing: car_info[1].innerText,
sentence1: car_info[2].innerText,
sentence2: car_info[3].innerText
};
cars.push(car);
}
});
}
return cars;
});
data.forEach(e => console.log(e));
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();
根据评论更新。
(async () => {
let browser, page;
let url = 'https://www.facebook.com/marketplace/nyc/search/?query=cars';
try {
browser = await puppeteer.launch({ headless: false });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
let data = await page.evaluate(() => {
let cars = [];
if (document.getElementsByClassName('sonix8o1')) {
Array.from(document.getElementsByClassName('sonix8o1')).forEach(element => {
let text = element.innerText;
if (text != "" && text != "Create New Listing" && text != "Log In" && text != "Learn More") {
let carElements = element.getElementsByClassName('rq0escxv')[0];
let car_info = carElements.getElementsByClassName('aahdfvyu');
let splitText = car_info[1].innerText.split(' ');
let car = {
price: car_info[0].innerText,
listing: car_info[1].innerText,
sentence1: splitText[0],
sentence2: splitText[1]
};
cars.push(car);
}
});
}
return cars;
});
data.forEach(e => console.log(e));
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();
那个returns: