加载一系列负载请求并为每个请求执行分页
Load a series of payload requests and perform pagination for each one of them
您好,我正在尝试发送一系列负载请求,并为每个负载请求执行分页。为了实现这一点,每个“有效负载请求”都应该增加一个变量“offset”。不幸的是,代码输出以下错误“TypeError:'bytes' object does not support item assignment”。
# -*- coding: utf-8 -*-
import scrapy
import json
class KauflandBasicProductsSpider(scrapy.Spider):
name = 'kaufland_basic_products'
allowed_domains = ['www.shopme.io']
custom_settings = {'ITEM_PIPELINES': {'groceries.pipelines.BasicProducts': 365}}
categories = [
"8d7a9abf-b90b-4c07-9e18-ed2283dfd71f",
"24ddb04a-f9b9-44f8-b78d-00ef5cd79977",
"3502a7bd-7459-4a51-91df-17375b15e03e"
]
def start_requests(self):
for category_id in self.categories:
payload = {"category_id": category_id}
yield scrapy.Request(
url='www.shopme.io/v1/feed',
)
def parse(self, response):
payload_var = response.request.body
# offset_var = int(payload_var['offset'])
resp = json.loads(response.body)
# print(resp)
products = resp.get('feed').get('items')[0].get('items')
# # print(products)
for product in products:
yield{
'product' : product.get('name'),
'price' : product.get('price'),
'price_promo' : 'n/a',
'weight' : 'n/a',
'weight_text' : 'n/a',
# 'brand' : product.get('brand').get('name'),
'country_of_origin' : 'n/a',
'source' : product.get('vendor_name'),
'link' : product.get('image_url')
# 'current_page' : response.meta['current_page']
# 'user-agent' : response.get('User-Agent').decode('utf-8')
}
count_available_products = resp.get('feed').get('count')
# increment_number = len(resp.get('feed').get('items')[0].get('items'))
# if current_payload['offset'] <= count_available_products:
# current_payload['offset'] += increment_number
if count_available_products >= 12:
# offset_var += 12
offset_var = response.meta['offset']
offset_var += 12
payload_var['offset'] = offset_var
yield scrapy.Request(
url="https://disco.deliveryhero.io/verticals/api/v1/feed",
method="POST",
body=json.dumps(payload_var),
headers={
'Content-Type': 'application/json'
},
callback=self.parse
)
您的问题的答案是,当您检索请求正文时,您有一个字节对象,而不是字典。这就是为什么你会得到 TypeError
异常。
payload_var = response.request.body # This is a bytes object
...
payload_var['offset'] = offset_var # This raises an exception
解决方案是使用 json.loads()
方法将其转换回字典。
payload_var = json.loads(response.request.body)
还有其他选项可以避免此错误,使用 payload
作为 class 变量(就像 categories
一样)就是其中之一。
您好,我正在尝试发送一系列负载请求,并为每个负载请求执行分页。为了实现这一点,每个“有效负载请求”都应该增加一个变量“offset”。不幸的是,代码输出以下错误“TypeError:'bytes' object does not support item assignment”。
# -*- coding: utf-8 -*-
import scrapy
import json
class KauflandBasicProductsSpider(scrapy.Spider):
name = 'kaufland_basic_products'
allowed_domains = ['www.shopme.io']
custom_settings = {'ITEM_PIPELINES': {'groceries.pipelines.BasicProducts': 365}}
categories = [
"8d7a9abf-b90b-4c07-9e18-ed2283dfd71f",
"24ddb04a-f9b9-44f8-b78d-00ef5cd79977",
"3502a7bd-7459-4a51-91df-17375b15e03e"
]
def start_requests(self):
for category_id in self.categories:
payload = {"category_id": category_id}
yield scrapy.Request(
url='www.shopme.io/v1/feed',
)
def parse(self, response):
payload_var = response.request.body
# offset_var = int(payload_var['offset'])
resp = json.loads(response.body)
# print(resp)
products = resp.get('feed').get('items')[0].get('items')
# # print(products)
for product in products:
yield{
'product' : product.get('name'),
'price' : product.get('price'),
'price_promo' : 'n/a',
'weight' : 'n/a',
'weight_text' : 'n/a',
# 'brand' : product.get('brand').get('name'),
'country_of_origin' : 'n/a',
'source' : product.get('vendor_name'),
'link' : product.get('image_url')
# 'current_page' : response.meta['current_page']
# 'user-agent' : response.get('User-Agent').decode('utf-8')
}
count_available_products = resp.get('feed').get('count')
# increment_number = len(resp.get('feed').get('items')[0].get('items'))
# if current_payload['offset'] <= count_available_products:
# current_payload['offset'] += increment_number
if count_available_products >= 12:
# offset_var += 12
offset_var = response.meta['offset']
offset_var += 12
payload_var['offset'] = offset_var
yield scrapy.Request(
url="https://disco.deliveryhero.io/verticals/api/v1/feed",
method="POST",
body=json.dumps(payload_var),
headers={
'Content-Type': 'application/json'
},
callback=self.parse
)
您的问题的答案是,当您检索请求正文时,您有一个字节对象,而不是字典。这就是为什么你会得到 TypeError
异常。
payload_var = response.request.body # This is a bytes object
...
payload_var['offset'] = offset_var # This raises an exception
解决方案是使用 json.loads()
方法将其转换回字典。
payload_var = json.loads(response.request.body)
还有其他选项可以避免此错误,使用 payload
作为 class 变量(就像 categories
一样)就是其中之一。