scrapy 分页不适用于 tripadvisor

scrapy pagination not working on tripadvisor

我正在尝试抓取 tripadvisor 上的餐厅页面(只是为了了解它是如何工作的) 但是,我只得到第一页。 我错过了什么? 这是代码,谢谢! 导入 scrapy

class TripadvSpider(scrapy.Spider):
    name = 'tripadv'

    allowed_domains = ['tripadvisor.com']
    start_urls = ['https://www.tripadvisor.com/Restaurants-g60795-oa0-Philadelphia_Pennsylvania.html#EATERY_LIST_CONTENTS']

    def parse(self, response):

        for stores in response.css('div.emrzT'):
            yield {
            'name' : stores.css('a.bHGqj::text').extract(),
            'link' : stores.css('a.bHGqj').xpath("@href").extract()
            }
        next_page = ('http://tripadvisor.com' + response.css('a.nav').attrib['href']).extract()
        ##next_page = response.xpath('//a[contains(text(), "Next")]/@href).extract())
        #next_page = ('http://tripadvisor.com' + response.css('a:contains("Next")').attrib['href'].extract())
        if next_page is not None:
            yield response.follow(next_page, callback=self.parse)

@djmystica,现在工作正常

import scrapy
class TripadvSpider(scrapy.Spider):
    name = 'tripadv'

    allowed_domains = ['tripadvisor.com']
    start_urls = [
        'https://www.tripadvisor.com/Restaurants-g60795-oa0-Philadelphia_Pennsylvania.html#EATERY_LIST_CONTENTS']

    def parse(self, response):

        for stores in response.css('div.emrzT'):
            yield {
                'name': stores.css('a.bHGqj::text').extract_first(),
                'link': stores.css('a.bHGqj').xpath("@href").extract_first()}
        #next_page = ('http://tripadvisor.com' +response.css('a.nav').attrib['href']).extract()
        next_page = response.xpath('//a[contains(text(), "Next")]/@href').extract_first()
        abs_next_page = f'https://www.tripadvisor.com{next_page}'
        #next_page = ('http://tripadvisor.com' + response.css('a:contains("Next")').attrib['href'].extract())
        if abs_next_page is not None:
            yield response.follow(abs_next_page, callback=self.parse)