使用 POST 获取 json 响应 使用 scrapy 请求 python
Getting json response using POST Request using scrapy python
我正在尝试使用来自此 website 的 post 请求获取数据。我在该网站上找到了 post url,但我使用 scrapy 没有得到相同的响应。
这是我的代码:
import scrapy
from scrapy.http import request
from scrapy.http.request.form import FormRequest
from scrapy.http import FormRequest
import json
class CodeSpider(scrapy.Spider):
name = 'code'
allowed_domains = ['code.comcom']
start_urls = ['https://technet.rapaport.com/HTTP/JSON/RetailFeed/GetDiamonds.aspx']
def start_requests(self):
form_data = {"request":{"header":{"raplink_access_key":"e7d7d61946804c579d02dab565371113","domain":"www.sarvadajewels.com"},"body":{"search_type":"white","shapes":["round"],"size_from":0.1,"size_to":100,"color_from":"D","color_to":"M","clarity_from":"IF","clarity_to":"I1","cut_from":"Excellent","cut_to":"Poor","polish_from":"Excellent","polish_to":"Poor","symmetry_from":"Excellent","symmetry_to":"Poor","labs":[],"fancy_colors":[],"price_total_from":0,"price_total_to":7428404930,"page_number":2,"page_size":"60","sort_by":"price","sort_direction":"asc","currency_code":"INR"}}}
request_body = json.dumps(form_data)
yield scrapy.Request('https://technet.rapaport.com/HTTP/JSON/RetailFeed/GetDiamonds.aspx',
method="POST",
body=request_body,
headers={'Content-Type': 'application/json; charset=UTF-8'},callback=self.parse )
def parse(self, response):
with open('test.json', 'w') as file:
file.write(str(response.body)
我遇到了这个错误:
{'response': {'header': {'error_code': 1001, 'error_message': 'Invalid format'
}, 'body': {}
}
IS there anyway to get this.
您使用的是错误的 header:
{'Content-Type': 'application/json; charset=UTF-8'}
应该是:
{'Content-Type': 'application/x-www-form-urlencoded'}
完整代码:
import scrapy
from scrapy.http import request
from scrapy.http.request.form import FormRequest
from scrapy.http import FormRequest
import json
class CodeSpider(scrapy.Spider):
name = 'code'
allowed_domains = ['code.comcom']
start_urls = ['https://technet.rapaport.com/HTTP/JSON/RetailFeed/GetDiamonds.aspx']
def start_requests(self):
form_data = {"request":{"header":{"raplink_access_key":"e7d7d61946804c579d02dab565371113","domain":"www.sarvadajewels.com"},"body":{"search_type":"white","shapes":["round"],"size_from":0.1,"size_to":100,"color_from":"D","color_to":"M","clarity_from":"IF","clarity_to":"I1","cut_from":"Excellent","cut_to":"Poor","polish_from":"Excellent","polish_to":"Poor","symmetry_from":"Excellent","symmetry_to":"Poor","labs":[],"fancy_colors":[],"price_total_from":0,"price_total_to":7428404930,"page_number":2,"page_size":"60","sort_by":"price","sort_direction":"asc","currency_code":"INR"}}}
request_body = json.dumps(form_data)
yield scrapy.Request('https://technet.rapaport.com/HTTP/JSON/RetailFeed/GetDiamonds.aspx',
method="POST",
body=request_body,
headers={'Content-Type': 'application/x-www-form-urlencoded'},callback=self.parse )
def parse(self, response):
yield json.loads(response.text)
此外,scrapy 支持将使用 -o
标志获得的项目写入不同格式的文件。所以你可以用它来代替pythonwrite
,试试:
scrapy runspider <spider_name> -o test.json
或
scrapy crawl code -o test.json
我正在尝试使用来自此 website 的 post 请求获取数据。我在该网站上找到了 post url,但我使用 scrapy 没有得到相同的响应。
这是我的代码:
import scrapy
from scrapy.http import request
from scrapy.http.request.form import FormRequest
from scrapy.http import FormRequest
import json
class CodeSpider(scrapy.Spider):
name = 'code'
allowed_domains = ['code.comcom']
start_urls = ['https://technet.rapaport.com/HTTP/JSON/RetailFeed/GetDiamonds.aspx']
def start_requests(self):
form_data = {"request":{"header":{"raplink_access_key":"e7d7d61946804c579d02dab565371113","domain":"www.sarvadajewels.com"},"body":{"search_type":"white","shapes":["round"],"size_from":0.1,"size_to":100,"color_from":"D","color_to":"M","clarity_from":"IF","clarity_to":"I1","cut_from":"Excellent","cut_to":"Poor","polish_from":"Excellent","polish_to":"Poor","symmetry_from":"Excellent","symmetry_to":"Poor","labs":[],"fancy_colors":[],"price_total_from":0,"price_total_to":7428404930,"page_number":2,"page_size":"60","sort_by":"price","sort_direction":"asc","currency_code":"INR"}}}
request_body = json.dumps(form_data)
yield scrapy.Request('https://technet.rapaport.com/HTTP/JSON/RetailFeed/GetDiamonds.aspx',
method="POST",
body=request_body,
headers={'Content-Type': 'application/json; charset=UTF-8'},callback=self.parse )
def parse(self, response):
with open('test.json', 'w') as file:
file.write(str(response.body)
我遇到了这个错误:
{'response': {'header': {'error_code': 1001, 'error_message': 'Invalid format'
}, 'body': {}
}
IS there anyway to get this.
您使用的是错误的 header:
{'Content-Type': 'application/json; charset=UTF-8'}
应该是:
{'Content-Type': 'application/x-www-form-urlencoded'}
完整代码:
import scrapy
from scrapy.http import request
from scrapy.http.request.form import FormRequest
from scrapy.http import FormRequest
import json
class CodeSpider(scrapy.Spider):
name = 'code'
allowed_domains = ['code.comcom']
start_urls = ['https://technet.rapaport.com/HTTP/JSON/RetailFeed/GetDiamonds.aspx']
def start_requests(self):
form_data = {"request":{"header":{"raplink_access_key":"e7d7d61946804c579d02dab565371113","domain":"www.sarvadajewels.com"},"body":{"search_type":"white","shapes":["round"],"size_from":0.1,"size_to":100,"color_from":"D","color_to":"M","clarity_from":"IF","clarity_to":"I1","cut_from":"Excellent","cut_to":"Poor","polish_from":"Excellent","polish_to":"Poor","symmetry_from":"Excellent","symmetry_to":"Poor","labs":[],"fancy_colors":[],"price_total_from":0,"price_total_to":7428404930,"page_number":2,"page_size":"60","sort_by":"price","sort_direction":"asc","currency_code":"INR"}}}
request_body = json.dumps(form_data)
yield scrapy.Request('https://technet.rapaport.com/HTTP/JSON/RetailFeed/GetDiamonds.aspx',
method="POST",
body=request_body,
headers={'Content-Type': 'application/x-www-form-urlencoded'},callback=self.parse )
def parse(self, response):
yield json.loads(response.text)
此外,scrapy 支持将使用 -o
标志获得的项目写入不同格式的文件。所以你可以用它来代替pythonwrite
,试试:
scrapy runspider <spider_name> -o test.json
或
scrapy crawl code -o test.json