如何在使用异步而不是 inline_requests 时在解析方法中发出 post 请求?
How to issue a post requests within parse method while using async instead of inline_requests?
我一直在尝试使用 async 来摆脱 parse 方法中的额外回调。我知道有一个图书馆 inline_requests 可以做到这一点。
但是,我希望坚持使用异步。我无法理解的是如何在解析方法中发出 post 请求。
当我使用 inline_requests 发出 post 请求时,我获得了成功:
import scrapy
from inline_requests import inline_requests
class HkexNewsSpider(scrapy.Spider):
name = "hkexnews"
start_url = "http://www.hkexnews.hk/sdw/search/searchsdw.aspx"
def start_requests(self):
yield scrapy.Request(self.start_url,callback=self.parse_item)
@inline_requests
def parse_item(self,response):
payload = {item.css('::attr(name)').get(default=''):item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
resp = yield scrapy.FormRequest(self.start_url, formdata=payload, dont_filter=True)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value":total_value}
在尝试使用异步发出 post 请求时,我得到 None 作为结果:
async def parse(self,response):
payload = {item.css('::attr(name)').get(default=''):item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
request = response.follow(self.start_url,method='POST',body=payload, dont_filter=True)
resp = await self.crawler.engine.download(request, self)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value":total_value}
How can I fetch result using the latter approach?
import scrapy
class HkexNewsSpider(scrapy.Spider):
name = "hkexnews"
start_urls = ['http://www.hkexnews.hk/sdw/search/searchsdw.aspx']
async def parse(self, response):
payload = {item.css('::attr(name)').get(default=''): item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
request = scrapy.FormRequest(self.start_urls[0], formdata=payload, dont_filter=True)
resp = await self.crawler.engine.download(request, self)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value": total_value}
输出:
{'Total Value': '2,546,531,648'}
我一直在尝试使用 async 来摆脱 parse 方法中的额外回调。我知道有一个图书馆 inline_requests 可以做到这一点。
但是,我希望坚持使用异步。我无法理解的是如何在解析方法中发出 post 请求。
当我使用 inline_requests 发出 post 请求时,我获得了成功:
import scrapy
from inline_requests import inline_requests
class HkexNewsSpider(scrapy.Spider):
name = "hkexnews"
start_url = "http://www.hkexnews.hk/sdw/search/searchsdw.aspx"
def start_requests(self):
yield scrapy.Request(self.start_url,callback=self.parse_item)
@inline_requests
def parse_item(self,response):
payload = {item.css('::attr(name)').get(default=''):item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
resp = yield scrapy.FormRequest(self.start_url, formdata=payload, dont_filter=True)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value":total_value}
在尝试使用异步发出 post 请求时,我得到 None 作为结果:
async def parse(self,response):
payload = {item.css('::attr(name)').get(default=''):item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
request = response.follow(self.start_url,method='POST',body=payload, dont_filter=True)
resp = await self.crawler.engine.download(request, self)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value":total_value}
How can I fetch result using the latter approach?
import scrapy
class HkexNewsSpider(scrapy.Spider):
name = "hkexnews"
start_urls = ['http://www.hkexnews.hk/sdw/search/searchsdw.aspx']
async def parse(self, response):
payload = {item.css('::attr(name)').get(default=''): item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
request = scrapy.FormRequest(self.start_urls[0], formdata=payload, dont_filter=True)
resp = await self.crawler.engine.download(request, self)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value": total_value}
输出:
{'Total Value': '2,546,531,648'}