如何在 scrapy 中执行 POST 方法?
how to perform POST method in scrapy?
请有人提供post下面的方法url。
https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture
1) 上面 URL 加载,它给出 POST URL 和下面的 formdata
Post Url = https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search
表单数据=
{"filters":{},"brandFilter":null,"sellersFilter":null,"catgroupId":"34790","levelOne":null,"searchMode"
:"BROWSE","sortBy":"RECOMMENDED","keyword":"south%20shore%20furniture","pageNum":1,"rowCount":120,"ffmMode"
:"ALL","priceFilter":null,"hideOOS":true,"uNo":"4848","session":{"guid":0,"emailId":"","sessionKey":"da9d76bd-bd4e-11e6-8e27-00505699251d"
,"userId":6026228,"appId":"MYGOFER"},"security":{"src":"web","ts":"2016- 12-08T14:01:57.619Z","authToken"
:""}}
2) 我已经在 FormRequest 中传递了 post url 和表单数据,但我没有得到任何回应。
import scrapy
from scrapy.http import FormRequest
class MygoferDSpider(scrapy.Spider):
name = "mygofer_d"
allowed_domains = ["mygofer.com"]
start_urls = ['https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture']
def start_requests(self):
return[FormRequest("https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search",
formdata = '''{"filters":{},
"brandFilter":"null",
"sellersFilter":"null",
"catgroupId":"34790",
"levelOne":"null",
"searchMode":"BROWSE",
"sortBy":"RECOMMENDED",
"keyword":"south%20shore%20furniture",
"pageNum":"1",
"rowCount":"120",
"ffmMode":"ALL",
"priceFilter":"null",
"hideOOS":"true",
"uNo":"4848",
"session":{"guid":"0",
"emailId":"",
"sessionKey":"fcd3bcd1-b7bf-11e6-8e27-00505699251d",
"userId":"5970776",
"appId":"MYGOFER"},
"security":{"src":"web",
"ts":"2016-12-01T12:58:28.994Z",
"authToken":""}}''',
callback=self.parse)]
handle_httpstatus_list = [415]
def parse(self, response):
print "+++++", response.url
with open("mygofer.txt","wb") as v:
v.write(response.body)
几件事:
- 您要发送的正文已经 JSON 编码,因此您要使用
body
参数,而不是 formdata
(用于 key/value 对到 URL-编码)
- 您需要指明您的 HTTP 请求正文的
Content-Type
(我的 Chrome 浏览器正在发送 Content-Type: application/json;charset=UTF-8
)
- 显然,
"null"
值在网站上表现不佳,请使用 null
示例 shell 会话:
$ scrapy shell -s USER_AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36" 'https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture'
(...)
>>> frq = scrapy.FormRequest("https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search",
... method="POST",
... body='''{"filters":{},
... "brandFilter":null,
... "sellersFilter":null,
... "catgroupId":"34790",
... "levelOne":null,
... "searchMode":"BROWSE",
... "sortBy":"RECOMMENDED",
... "keyword":"south%20shore%20furniture",
... "pageNum":"1",
... "rowCount":"120",
... "ffmMode":"ALL",
... "priceFilter":null,
... "hideOOS":"true",
... "uNo":"4848",
... "session":{"guid":"0",
... "emailId":"",
... "sessionKey":"fcd3bcd1-b7bf-11e6-8e27-00505699251d",
... "userId":"5970776",
... "appId":"MYGOFER"},
... "security":{"src":"web",
... "ts":"2016-12-01T12:58:28.994Z",
... "authToken":""}}''',
... headers={"Content-Type": "application/json;charset=UTF-8",
... "Accept":"application/json, text/plain, */*"})
>>> fetch(frq)
2016-12-08 15:50:26 [scrapy] DEBUG: Crawled (200) <POST https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search> (referer: None)
>>>
>>>
>>> import json
>>> data = json.loads(response.text)
>>> len(data)
3
>>> data.keys()
[u'classType', u'payload', u'userRole']
>>>
>>> from pprint import pprint
>>>
>>> pprint(data)
{u'classType': u'com.shc.ecom.local.search.beans.output.SearchOutput',
u'payload': {u'feature': {},
u'filters': {u'levelThree': [{u'catGpId': u'28371',
u'catGpPath': u'For the Home_Kids Room_Fun Accessories',
u'count': 1,
u'name': u'Fun Accessories',
u'parentLevel': u'Kids Room',
u'seoPath': u'for-the-home-kids-room-fun-accessories'},
...
{u'catGpId': u'1231474854',
u'catGpPath': u'TVs & Electronics_Media Furniture_TV Stands',
u'count': 69,
u'name': u'TV Stands',
u'parentLevel': u'Media Furniture',
u'seoPath': u'tvs-electronics-media-furniture-tv-stands'}],
u'narrowBy': [{u'count': 8,
u'name': u'Double Sided',
u'value': u'Yes'},
{u'count': 4,
u'name': u'Upholstered',
u'value': u'No'},
{u'count': 24,
u'name': u'Mobile',
u'value': u'Yes'},
{u'count': 24,
u'name': u'Fire Resistant',
u'value': u'No'}],
u'otherFilters': {u'Assembly': {u'Assembled': 2,
u'Ready to assemble': 770},
u'Audience': {u'Adult': 262,
u'All ages': 7,
u'Dorm/College': 2,
u'Kids': 351,
u'Teen': 12},
...
u'Width Range (in.)': {u'12 - 24 in.': 8,
u'25 - 36 in.': 106,
u'37 - 48 in.': 32,
u'49 - 60 in.': 70,
u'61 - 72 in.': 4,
u'Less than 12 in.': 2}},
u'priceRanges': [{u'cnt': u'262',
u'high': u'100',
u'low': u'0'},
{u'cnt': u'269',
u'high': u'150',
u'low': u'100'},
{u'cnt': u'251',
u'high': u'200',
u'low': u'150'},
{u'cnt': u'219',
u'high': u'275',
u'low': u'200'},
{u'cnt': u'94',
u'high': u'above',
u'low': u'275'}]},
u'keyword': u'south%20shore%20furniture',
u'levelOne': {u'catGpId': u'34790',
u'catGpPath': u'Furniture',
u'name': u'Furniture',
u'seoPath': u'furniture'},
u'maxPrice': u'2539.19',
u'minPrice': u'12.65',
u'numFound': u'1095',
u'products': [{u'availFFMs': [u'SHIP'],
u'brand': u'South Shore',
u'ffm': u'VD',
u'freeShip': u'0',
u'img': u'http://c.shld.net/rpx/i/s/pi/mp/20571/prod_6578221517?src=http%3A%2F%2Fak1.ostkcdn.com%2Fimages%2Fproducts%2F9810550%2FSouth-Shore-Willow-Twin-Bookcase-Headboard-39-Sumptuous-Cherry-0da3d88a-cb6a-4048-80d4-be464e85da49.jpg&d=8d8fee1e07dc750e2fb7c5711a500bf32278595c',
u'isInCart': False,
u'itemPartNumber': u'SPM9120228717',
u'mailable': u'1',
u'mfpartno': u'3356098-9810550',
u'name': u'South Shore Willow Twin Bookcase Headboard Sumptuous Cherry',
u'partNumber': u'SPM9120228717',
u'prdType': u'NONVARIATION',
u'price': {u'mapViolation': False,
u'pid': u'SPM9120228717'},
u'qtyInCart': 0,
u'rating': 0.0,
u'reviews': 0,
u'salePrice': 87.11,
u'shipStock': u'1',
u'soldBy': u'Overstock.com',
u'solrSalePrice': 87.11,
u'storePrice': False,
u'type': u'NONVARIATION'},
...
{u'availFFMs': [u'SHIP'],
u'brand': u'South Shore',
u'ffm': u'VD',
u'freeShip': u'1',
u'img': u'http://c.shld.net/rpx/i/s/i/spin/image/spin_prod_204451401',
u'isInCart': False,
u'itemPartNumber': u'00827455000',
u'mailable': u'1',
u'mfpartno': u'7250767',
u'name': u'Axess Collection 4-Shelf Bookcasen Pure White',
u'partNumber': u'00827455000P',
u'prdType': u'NONVARIATION',
u'price': {u'clearancePrice': u'0.00',
u'mapViolation': False,
u'pid': u'00827455000',
u'priceType': u'P',
u'promoPrice': u'67.49',
u'regularPrice': u'74.99',
u'salePrice': u'67.49',
u'savings': u'7.5'},
u'qtyInCart': 0,
u'rating': 0.0,
u'reviews': 0,
u'salePrice': 67.49,
u'shipStock': u'1',
u'soldBy': u'Sears',
u'solrSalePrice': 59.71,
u'storePrice': False,
u'type': u'NONVARIATION'}],
u'query': u'http://solrx416p.prod.ch4.s.com:8380/search/select?qt=simpleallsubcat&q=south%20shore%20furniture&wt=json&start=0&rows=120&fq=catalogs:("27151")&fq=level1Cats:("27151_Furniture")&fq=storeAttributes:(!"10175_OUTOFSTOCK_INDICATOR=1")&fq=!(storeAttributes:("10175_DEFAULT_FULFILLMENT=DDC" OR "10175_DEFAULT_FULFILLMENT=KRES" OR "10175_DEFAULT_FULFILLMENT=CRES" OR "10175_DEFAULT_FULFILLMENT=DRES" OR "10175_DEFAULT_FULFILLMENT=SRES" OR "10175_DEFAULT_FULFILLMENT=PLSFS"))&sort=instock desc,fulfillment desc,imageStatus desc,score desc&clientID=MyGofer&sortPrefix=4848~10175&globalPrefix=4848,10175',
u'relevancyRedirect': False,
u'status': u'success',
u'twItems': []},
u'userRole': None}
>>>
这是 5 年后的事了,但您可以使用 scrapy.http.JsonRequest 来处理 JSON 有效载荷 - 文档中的示例:
data = {
'name1': 'value1',
'name2': 'value2',
}
yield JsonRequest(url='http://www.example.com/post/action', data=data)
请有人提供post下面的方法url。
https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture
1) 上面 URL 加载,它给出 POST URL 和下面的 formdata
Post Url = https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search
表单数据= {"filters":{},"brandFilter":null,"sellersFilter":null,"catgroupId":"34790","levelOne":null,"searchMode" :"BROWSE","sortBy":"RECOMMENDED","keyword":"south%20shore%20furniture","pageNum":1,"rowCount":120,"ffmMode" :"ALL","priceFilter":null,"hideOOS":true,"uNo":"4848","session":{"guid":0,"emailId":"","sessionKey":"da9d76bd-bd4e-11e6-8e27-00505699251d" ,"userId":6026228,"appId":"MYGOFER"},"security":{"src":"web","ts":"2016- 12-08T14:01:57.619Z","authToken" :""}}
2) 我已经在 FormRequest 中传递了 post url 和表单数据,但我没有得到任何回应。
import scrapy
from scrapy.http import FormRequest
class MygoferDSpider(scrapy.Spider):
name = "mygofer_d"
allowed_domains = ["mygofer.com"]
start_urls = ['https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture']
def start_requests(self):
return[FormRequest("https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search",
formdata = '''{"filters":{},
"brandFilter":"null",
"sellersFilter":"null",
"catgroupId":"34790",
"levelOne":"null",
"searchMode":"BROWSE",
"sortBy":"RECOMMENDED",
"keyword":"south%20shore%20furniture",
"pageNum":"1",
"rowCount":"120",
"ffmMode":"ALL",
"priceFilter":"null",
"hideOOS":"true",
"uNo":"4848",
"session":{"guid":"0",
"emailId":"",
"sessionKey":"fcd3bcd1-b7bf-11e6-8e27-00505699251d",
"userId":"5970776",
"appId":"MYGOFER"},
"security":{"src":"web",
"ts":"2016-12-01T12:58:28.994Z",
"authToken":""}}''',
callback=self.parse)]
handle_httpstatus_list = [415]
def parse(self, response):
print "+++++", response.url
with open("mygofer.txt","wb") as v:
v.write(response.body)
几件事:
- 您要发送的正文已经 JSON 编码,因此您要使用
body
参数,而不是formdata
(用于 key/value 对到 URL-编码) - 您需要指明您的 HTTP 请求正文的
Content-Type
(我的 Chrome 浏览器正在发送Content-Type: application/json;charset=UTF-8
) - 显然,
"null"
值在网站上表现不佳,请使用null
示例 shell 会话:
$ scrapy shell -s USER_AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36" 'https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture'
(...)
>>> frq = scrapy.FormRequest("https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search",
... method="POST",
... body='''{"filters":{},
... "brandFilter":null,
... "sellersFilter":null,
... "catgroupId":"34790",
... "levelOne":null,
... "searchMode":"BROWSE",
... "sortBy":"RECOMMENDED",
... "keyword":"south%20shore%20furniture",
... "pageNum":"1",
... "rowCount":"120",
... "ffmMode":"ALL",
... "priceFilter":null,
... "hideOOS":"true",
... "uNo":"4848",
... "session":{"guid":"0",
... "emailId":"",
... "sessionKey":"fcd3bcd1-b7bf-11e6-8e27-00505699251d",
... "userId":"5970776",
... "appId":"MYGOFER"},
... "security":{"src":"web",
... "ts":"2016-12-01T12:58:28.994Z",
... "authToken":""}}''',
... headers={"Content-Type": "application/json;charset=UTF-8",
... "Accept":"application/json, text/plain, */*"})
>>> fetch(frq)
2016-12-08 15:50:26 [scrapy] DEBUG: Crawled (200) <POST https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search> (referer: None)
>>>
>>>
>>> import json
>>> data = json.loads(response.text)
>>> len(data)
3
>>> data.keys()
[u'classType', u'payload', u'userRole']
>>>
>>> from pprint import pprint
>>>
>>> pprint(data)
{u'classType': u'com.shc.ecom.local.search.beans.output.SearchOutput',
u'payload': {u'feature': {},
u'filters': {u'levelThree': [{u'catGpId': u'28371',
u'catGpPath': u'For the Home_Kids Room_Fun Accessories',
u'count': 1,
u'name': u'Fun Accessories',
u'parentLevel': u'Kids Room',
u'seoPath': u'for-the-home-kids-room-fun-accessories'},
...
{u'catGpId': u'1231474854',
u'catGpPath': u'TVs & Electronics_Media Furniture_TV Stands',
u'count': 69,
u'name': u'TV Stands',
u'parentLevel': u'Media Furniture',
u'seoPath': u'tvs-electronics-media-furniture-tv-stands'}],
u'narrowBy': [{u'count': 8,
u'name': u'Double Sided',
u'value': u'Yes'},
{u'count': 4,
u'name': u'Upholstered',
u'value': u'No'},
{u'count': 24,
u'name': u'Mobile',
u'value': u'Yes'},
{u'count': 24,
u'name': u'Fire Resistant',
u'value': u'No'}],
u'otherFilters': {u'Assembly': {u'Assembled': 2,
u'Ready to assemble': 770},
u'Audience': {u'Adult': 262,
u'All ages': 7,
u'Dorm/College': 2,
u'Kids': 351,
u'Teen': 12},
...
u'Width Range (in.)': {u'12 - 24 in.': 8,
u'25 - 36 in.': 106,
u'37 - 48 in.': 32,
u'49 - 60 in.': 70,
u'61 - 72 in.': 4,
u'Less than 12 in.': 2}},
u'priceRanges': [{u'cnt': u'262',
u'high': u'100',
u'low': u'0'},
{u'cnt': u'269',
u'high': u'150',
u'low': u'100'},
{u'cnt': u'251',
u'high': u'200',
u'low': u'150'},
{u'cnt': u'219',
u'high': u'275',
u'low': u'200'},
{u'cnt': u'94',
u'high': u'above',
u'low': u'275'}]},
u'keyword': u'south%20shore%20furniture',
u'levelOne': {u'catGpId': u'34790',
u'catGpPath': u'Furniture',
u'name': u'Furniture',
u'seoPath': u'furniture'},
u'maxPrice': u'2539.19',
u'minPrice': u'12.65',
u'numFound': u'1095',
u'products': [{u'availFFMs': [u'SHIP'],
u'brand': u'South Shore',
u'ffm': u'VD',
u'freeShip': u'0',
u'img': u'http://c.shld.net/rpx/i/s/pi/mp/20571/prod_6578221517?src=http%3A%2F%2Fak1.ostkcdn.com%2Fimages%2Fproducts%2F9810550%2FSouth-Shore-Willow-Twin-Bookcase-Headboard-39-Sumptuous-Cherry-0da3d88a-cb6a-4048-80d4-be464e85da49.jpg&d=8d8fee1e07dc750e2fb7c5711a500bf32278595c',
u'isInCart': False,
u'itemPartNumber': u'SPM9120228717',
u'mailable': u'1',
u'mfpartno': u'3356098-9810550',
u'name': u'South Shore Willow Twin Bookcase Headboard Sumptuous Cherry',
u'partNumber': u'SPM9120228717',
u'prdType': u'NONVARIATION',
u'price': {u'mapViolation': False,
u'pid': u'SPM9120228717'},
u'qtyInCart': 0,
u'rating': 0.0,
u'reviews': 0,
u'salePrice': 87.11,
u'shipStock': u'1',
u'soldBy': u'Overstock.com',
u'solrSalePrice': 87.11,
u'storePrice': False,
u'type': u'NONVARIATION'},
...
{u'availFFMs': [u'SHIP'],
u'brand': u'South Shore',
u'ffm': u'VD',
u'freeShip': u'1',
u'img': u'http://c.shld.net/rpx/i/s/i/spin/image/spin_prod_204451401',
u'isInCart': False,
u'itemPartNumber': u'00827455000',
u'mailable': u'1',
u'mfpartno': u'7250767',
u'name': u'Axess Collection 4-Shelf Bookcasen Pure White',
u'partNumber': u'00827455000P',
u'prdType': u'NONVARIATION',
u'price': {u'clearancePrice': u'0.00',
u'mapViolation': False,
u'pid': u'00827455000',
u'priceType': u'P',
u'promoPrice': u'67.49',
u'regularPrice': u'74.99',
u'salePrice': u'67.49',
u'savings': u'7.5'},
u'qtyInCart': 0,
u'rating': 0.0,
u'reviews': 0,
u'salePrice': 67.49,
u'shipStock': u'1',
u'soldBy': u'Sears',
u'solrSalePrice': 59.71,
u'storePrice': False,
u'type': u'NONVARIATION'}],
u'query': u'http://solrx416p.prod.ch4.s.com:8380/search/select?qt=simpleallsubcat&q=south%20shore%20furniture&wt=json&start=0&rows=120&fq=catalogs:("27151")&fq=level1Cats:("27151_Furniture")&fq=storeAttributes:(!"10175_OUTOFSTOCK_INDICATOR=1")&fq=!(storeAttributes:("10175_DEFAULT_FULFILLMENT=DDC" OR "10175_DEFAULT_FULFILLMENT=KRES" OR "10175_DEFAULT_FULFILLMENT=CRES" OR "10175_DEFAULT_FULFILLMENT=DRES" OR "10175_DEFAULT_FULFILLMENT=SRES" OR "10175_DEFAULT_FULFILLMENT=PLSFS"))&sort=instock desc,fulfillment desc,imageStatus desc,score desc&clientID=MyGofer&sortPrefix=4848~10175&globalPrefix=4848,10175',
u'relevancyRedirect': False,
u'status': u'success',
u'twItems': []},
u'userRole': None}
>>>
这是 5 年后的事了,但您可以使用 scrapy.http.JsonRequest 来处理 JSON 有效载荷 - 文档中的示例:
data = {
'name1': 'value1',
'name2': 'value2',
}
yield JsonRequest(url='http://www.example.com/post/action', data=data)