无法从 scrapy 中的 link 获得任何请求
Not able to get any request from a link in scrapy
这里的问题是它无法从 final_url 获得任何请求并且无法从 link.
获得营业时间数据
class YellSpider(scrapy.Spider):
name = 'yell'
base_url = 'https://www.yell.com{}'
start_urls = ['https://www.yell.com/ucs/UcsSearchAction.do?scrambleSeed=770796459&keywords=hospitals&location=united+kingdom']
def parse(self, response):
for data in response.css('div.row.businessCapsule--mainRow'):
title = data.css('.text-h2::text').get()
business_url = data.css('a.businessCapsule--title::attr(href)').get()
final_url = self.base_url.format(business_url)
avg_rating = response.css('span.starRating--average::text').get()
def parse_site(self,response):
req = scrapy.Request(final_url, callback=self.parse_site)
opening_hours = response.css('strong::text').get().strip()
items= {
'Title': title ,
'Title Url' : final_url,
'Average Rating': avg_rating,
'Hours': opening_hours
}
yield items
pass
以下应该可以解决您遇到的问题。结果表明您没有向此 parse_site
方法发送任何请求,这就是您的请求未被处理的原因。
class YellSpider(scrapy.Spider):
name = 'yell'
base_url = 'https://www.yell.com{}'
start_urls = ['https://www.yell.com/ucs/UcsSearchAction.do?scrambleSeed=770796459&keywords=hospitals&location=united+kingdom']
def parse(self, response):
for data in response.css('div.row.businessCapsule--mainRow'):
title = data.css('.text-h2::text').get()
business_url = data.css('a.businessCapsule--title::attr(href)').get()
final_url = self.base_url.format(business_url)
avg_rating = response.css('span.starRating--average::text').get()
yield scrapy.Request(
final_url,
callback=self.parse_site,
cb_kwargs={
"title":title,
"final_url":final_url,
"avg_rating":avg_rating,
}
)
def parse_site(self,response,title,final_url,avg_rating):
opening_hours = response.css('strong::text').get()
opening_hours = opening_hours.strip() if opening_hours else ""
items = {
'Title': title ,
'Title Url' : final_url,
'Average Rating': avg_rating,
'Hours': opening_hours
}
yield items
这里的问题是它无法从 final_url 获得任何请求并且无法从 link.
获得营业时间数据class YellSpider(scrapy.Spider):
name = 'yell'
base_url = 'https://www.yell.com{}'
start_urls = ['https://www.yell.com/ucs/UcsSearchAction.do?scrambleSeed=770796459&keywords=hospitals&location=united+kingdom']
def parse(self, response):
for data in response.css('div.row.businessCapsule--mainRow'):
title = data.css('.text-h2::text').get()
business_url = data.css('a.businessCapsule--title::attr(href)').get()
final_url = self.base_url.format(business_url)
avg_rating = response.css('span.starRating--average::text').get()
def parse_site(self,response):
req = scrapy.Request(final_url, callback=self.parse_site)
opening_hours = response.css('strong::text').get().strip()
items= {
'Title': title ,
'Title Url' : final_url,
'Average Rating': avg_rating,
'Hours': opening_hours
}
yield items
pass
以下应该可以解决您遇到的问题。结果表明您没有向此 parse_site
方法发送任何请求,这就是您的请求未被处理的原因。
class YellSpider(scrapy.Spider):
name = 'yell'
base_url = 'https://www.yell.com{}'
start_urls = ['https://www.yell.com/ucs/UcsSearchAction.do?scrambleSeed=770796459&keywords=hospitals&location=united+kingdom']
def parse(self, response):
for data in response.css('div.row.businessCapsule--mainRow'):
title = data.css('.text-h2::text').get()
business_url = data.css('a.businessCapsule--title::attr(href)').get()
final_url = self.base_url.format(business_url)
avg_rating = response.css('span.starRating--average::text').get()
yield scrapy.Request(
final_url,
callback=self.parse_site,
cb_kwargs={
"title":title,
"final_url":final_url,
"avg_rating":avg_rating,
}
)
def parse_site(self,response,title,final_url,avg_rating):
opening_hours = response.css('strong::text').get()
opening_hours = opening_hours.strip() if opening_hours else ""
items = {
'Title': title ,
'Title Url' : final_url,
'Average Rating': avg_rating,
'Hours': opening_hours
}
yield items