在实际 API 上使用 puppeteer
Using puppeteer on an actual API
很抱歉问了这么长的问题,我是 node 的新手,但我之前已经用身份验证和所有内容制作了 CRUD API,我只需要了解如何将 puppeteer 集成到 API,让我开始吧:
这是我的项目结构:
傀儡师-api
- 控制器 - puppeteer.controller.js
- 路线 - puppeteer.routes.js
- index.js
这是我的 index.js 文件:
const puppeteer = require('puppeteer');
const express = require('express');
const booking = require('./routes/puppeteer.routes')
const app = express();
app.use('/booking', booking);
let port = 8080;
app.listen(port, () => {
console.log('Server is running on https://localhost:8080/');
});
puppeteer.routes.js:
const express = require('express');
const router = express.Router();
const puppeteer_controller = require('../controllers/puppeteer.controller');
router.get('/', puppeteer_controller.get_booking);
module.exports = router;
puppeteer.controller.js:
const puppeteer = require('puppeteer');
exports.get_booking = (req, res, next) => {
res.json = (async function main() {
try {
const browser = await puppeteer.launch({ headless: true});
const page = await browser.newPage();
await page.goto('https://www.booking.com/searchresults.es-ar.html?label=gen173nr-1DCAEoggI46AdIM1gEaAyIAQGYASy4ARfIAQzYAQPoAQGIAgGoAgM&lang=es-ar&sid=bc11c3e819d105b3c501d0c7a501c718&sb=1&src=index&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Findex.es-ar.html%3Flabel%3Dgen173nr-1DCAEoggI46AdIM1gEaAyIAQGYASy4ARfIAQzYAQPoAQGIAgGoAgM%3Bsid%3Dbc11c3e819d105b3c501d0c7a501c718%3Bsb_price_type%3Dtotal%26%3B&ss=El+Bols%C3%B3n%2C+R%C3%ADo+Negro%2C+Argentina&is_ski_area=&checkin_year=&checkin_month=&checkout_year=&checkout_month=&no_rooms=1&group_adults=2&group_children=0&b_h4u_keep_filters=&from_sf=1&ss_raw=el+bols&ac_position=0&ac_langcode=es&ac_click_type=b&dest_id=-985282&dest_type=city&place_id_lat=-41.964452&place_id_lon=-71.532732&search_pageview_id=06d48fb6823e00e9&search_selected=true&search_pageview_id=06d48fb6823e00e9&ac_suggestion_list_length=5&ac_suggestion_theme_list_length=0');
await page.waitForSelector('.sr_item');
page.on('console', consoleObj => console.log(consoleObj.text()));
console.log('Retrieving hotels data');
const hoteles = page.evaluate(() => {
let hoteles = [];
let x = document.getElementsByClassName('sr_item');
hoteles.push(x);
let navigation = document.getElementsByClassName('sr_pagination_item');
for (const nav of navigation) {
nav.click();
hoteles.push(document.getElementsByClassName('sr_item'));
}
console.log('Finished looping through');
return hoteles;
});
} catch(e) {
console.log('error', e);
}
})();
};
所以,我想要的是能够从我的应用程序发送一个 GET 请求,并从我的 API 那里得到一个响应,其中包含预订的酒店列表,这只是一个个人项目,事情是,使用 Postman 我正在发送 GET 请求,但我根本没有得到任何回应,所以我想知道我做错了什么以及应该遵循什么方向,如果有人能够指出我正确的方向我会是非常感谢。
- 块
({})()
立即运行您的代码,而不是根据请求运行。
- res.json 是一个函数,你不应该重新分配它。
相反,将函数移动到其他地方并像下面这样调用它,
async function scraper() {
try {
const browser = await puppeteer.launch({ headless: true});
const page = await browser.newPage();
await page.goto('https://www.booking.com/searchresults.es-ar.html?label=gen173nr-1DCAEoggI46AdIM1gEaAyIAQGYASy4ARfIAQzYAQPoAQGIAgGoAgM&lang=es-ar&sid=bc11c3e819d105b3c501d0c7a501c718&sb=1&src=index&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Findex.es-ar.html%3Flabel%3Dgen173nr-1DCAEoggI46AdIM1gEaAyIAQGYASy4ARfIAQzYAQPoAQGIAgGoAgM%3Bsid%3Dbc11c3e819d105b3c501d0c7a501c718%3Bsb_price_type%3Dtotal%26%3B&ss=El+Bols%C3%B3n%2C+R%C3%ADo+Negro%2C+Argentina&is_ski_area=&checkin_year=&checkin_month=&checkout_year=&checkout_month=&no_rooms=1&group_adults=2&group_children=0&b_h4u_keep_filters=&from_sf=1&ss_raw=el+bols&ac_position=0&ac_langcode=es&ac_click_type=b&dest_id=-985282&dest_type=city&place_id_lat=-41.964452&place_id_lon=-71.532732&search_pageview_id=06d48fb6823e00e9&search_selected=true&search_pageview_id=06d48fb6823e00e9&ac_suggestion_list_length=5&ac_suggestion_theme_list_length=0');
await page.waitForSelector('.sr_item');
page.on('console', consoleObj => console.log(consoleObj.text()));
console.log('Retrieving hotels data');
const hoteles = page.evaluate(() => {
let hoteles = [];
let x = document.getElementsByClassName('sr_item');
hoteles.push(x);
let navigation = document.getElementsByClassName('sr_pagination_item');
for (const nav of navigation) {
nav.click();
hoteles.push(document.getElementsByClassName('sr_item'));
}
console.log('Finished looping through');
return hoteles;
});
} catch(e) {
console.log('error', e);
}
}
// Call the scraper
exports.get_booking = async (req, res, next) => {
const scraperData = await scraper();
res.json(scraperData)
}
它将使控制器变成一个承诺和 return JSON 数据。
很抱歉问了这么长的问题,我是 node 的新手,但我之前已经用身份验证和所有内容制作了 CRUD API,我只需要了解如何将 puppeteer 集成到 API,让我开始吧:
这是我的项目结构:
傀儡师-api
- 控制器 - puppeteer.controller.js
- 路线 - puppeteer.routes.js
- index.js
这是我的 index.js 文件:
const puppeteer = require('puppeteer');
const express = require('express');
const booking = require('./routes/puppeteer.routes')
const app = express();
app.use('/booking', booking);
let port = 8080;
app.listen(port, () => {
console.log('Server is running on https://localhost:8080/');
});
puppeteer.routes.js:
const express = require('express');
const router = express.Router();
const puppeteer_controller = require('../controllers/puppeteer.controller');
router.get('/', puppeteer_controller.get_booking);
module.exports = router;
puppeteer.controller.js:
const puppeteer = require('puppeteer');
exports.get_booking = (req, res, next) => {
res.json = (async function main() {
try {
const browser = await puppeteer.launch({ headless: true});
const page = await browser.newPage();
await page.goto('https://www.booking.com/searchresults.es-ar.html?label=gen173nr-1DCAEoggI46AdIM1gEaAyIAQGYASy4ARfIAQzYAQPoAQGIAgGoAgM&lang=es-ar&sid=bc11c3e819d105b3c501d0c7a501c718&sb=1&src=index&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Findex.es-ar.html%3Flabel%3Dgen173nr-1DCAEoggI46AdIM1gEaAyIAQGYASy4ARfIAQzYAQPoAQGIAgGoAgM%3Bsid%3Dbc11c3e819d105b3c501d0c7a501c718%3Bsb_price_type%3Dtotal%26%3B&ss=El+Bols%C3%B3n%2C+R%C3%ADo+Negro%2C+Argentina&is_ski_area=&checkin_year=&checkin_month=&checkout_year=&checkout_month=&no_rooms=1&group_adults=2&group_children=0&b_h4u_keep_filters=&from_sf=1&ss_raw=el+bols&ac_position=0&ac_langcode=es&ac_click_type=b&dest_id=-985282&dest_type=city&place_id_lat=-41.964452&place_id_lon=-71.532732&search_pageview_id=06d48fb6823e00e9&search_selected=true&search_pageview_id=06d48fb6823e00e9&ac_suggestion_list_length=5&ac_suggestion_theme_list_length=0');
await page.waitForSelector('.sr_item');
page.on('console', consoleObj => console.log(consoleObj.text()));
console.log('Retrieving hotels data');
const hoteles = page.evaluate(() => {
let hoteles = [];
let x = document.getElementsByClassName('sr_item');
hoteles.push(x);
let navigation = document.getElementsByClassName('sr_pagination_item');
for (const nav of navigation) {
nav.click();
hoteles.push(document.getElementsByClassName('sr_item'));
}
console.log('Finished looping through');
return hoteles;
});
} catch(e) {
console.log('error', e);
}
})();
};
所以,我想要的是能够从我的应用程序发送一个 GET 请求,并从我的 API 那里得到一个响应,其中包含预订的酒店列表,这只是一个个人项目,事情是,使用 Postman 我正在发送 GET 请求,但我根本没有得到任何回应,所以我想知道我做错了什么以及应该遵循什么方向,如果有人能够指出我正确的方向我会是非常感谢。
- 块
({})()
立即运行您的代码,而不是根据请求运行。 - res.json 是一个函数,你不应该重新分配它。
相反,将函数移动到其他地方并像下面这样调用它,
async function scraper() {
try {
const browser = await puppeteer.launch({ headless: true});
const page = await browser.newPage();
await page.goto('https://www.booking.com/searchresults.es-ar.html?label=gen173nr-1DCAEoggI46AdIM1gEaAyIAQGYASy4ARfIAQzYAQPoAQGIAgGoAgM&lang=es-ar&sid=bc11c3e819d105b3c501d0c7a501c718&sb=1&src=index&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Findex.es-ar.html%3Flabel%3Dgen173nr-1DCAEoggI46AdIM1gEaAyIAQGYASy4ARfIAQzYAQPoAQGIAgGoAgM%3Bsid%3Dbc11c3e819d105b3c501d0c7a501c718%3Bsb_price_type%3Dtotal%26%3B&ss=El+Bols%C3%B3n%2C+R%C3%ADo+Negro%2C+Argentina&is_ski_area=&checkin_year=&checkin_month=&checkout_year=&checkout_month=&no_rooms=1&group_adults=2&group_children=0&b_h4u_keep_filters=&from_sf=1&ss_raw=el+bols&ac_position=0&ac_langcode=es&ac_click_type=b&dest_id=-985282&dest_type=city&place_id_lat=-41.964452&place_id_lon=-71.532732&search_pageview_id=06d48fb6823e00e9&search_selected=true&search_pageview_id=06d48fb6823e00e9&ac_suggestion_list_length=5&ac_suggestion_theme_list_length=0');
await page.waitForSelector('.sr_item');
page.on('console', consoleObj => console.log(consoleObj.text()));
console.log('Retrieving hotels data');
const hoteles = page.evaluate(() => {
let hoteles = [];
let x = document.getElementsByClassName('sr_item');
hoteles.push(x);
let navigation = document.getElementsByClassName('sr_pagination_item');
for (const nav of navigation) {
nav.click();
hoteles.push(document.getElementsByClassName('sr_item'));
}
console.log('Finished looping through');
return hoteles;
});
} catch(e) {
console.log('error', e);
}
}
// Call the scraper
exports.get_booking = async (req, res, next) => {
const scraperData = await scraper();
res.json(scraperData)
}
它将使控制器变成一个承诺和 return JSON 数据。