如何在 scrapy 中执行 POST 方法?

how to perform POST method in scrapy?

请有人提供post下面的方法url。

https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture

1) 上面 URL 加载,它给出 POST URL 和下面的 formdata

Post Url = https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search

表单数据= {"filters":{},"brandFilter":null,"sellersFilter":null,"catgroupId":"34790","levelOne":null,"searchMode" :"BROWSE","sortBy":"RECOMMENDED","keyword":"south%20shore%20furniture","pageNum":1,"rowCount":120,"ffmMode" :"ALL","priceFilter":null,"hideOOS":true,"uNo":"4848","session":{"guid":0,"emailId":"","sessionKey":"da9d76bd-bd4e-11e6-8e27-00505699251d" ,"userId":6026228,"appId":"MYGOFER"},"security":{"src":"web","ts":"2016- 12-08T14:01:57.619Z","authToken" :""}}

2) 我已经在 FormRequest 中传递了 post url 和表单数据,但我没有得到任何回应。

import scrapy
from scrapy.http import FormRequest


class MygoferDSpider(scrapy.Spider):
    name = "mygofer_d"
    allowed_domains = ["mygofer.com"]
    start_urls = ['https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture']


    def start_requests(self):
        return[FormRequest("https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search",
                       formdata = '''{"filters":{},
                                   "brandFilter":"null",
                                   "sellersFilter":"null",
                                   "catgroupId":"34790",
                                   "levelOne":"null",
                                   "searchMode":"BROWSE",
                                   "sortBy":"RECOMMENDED",
                                   "keyword":"south%20shore%20furniture",
                                   "pageNum":"1",
                                   "rowCount":"120",
                                   "ffmMode":"ALL",
                                   "priceFilter":"null",
                                   "hideOOS":"true",
                                   "uNo":"4848",
                                   "session":{"guid":"0",
                                              "emailId":"",
                                              "sessionKey":"fcd3bcd1-b7bf-11e6-8e27-00505699251d",
                                              "userId":"5970776",
                                              "appId":"MYGOFER"},
                                   "security":{"src":"web",
                                               "ts":"2016-12-01T12:58:28.994Z",
                                               "authToken":""}}''',
                       callback=self.parse)]

    handle_httpstatus_list = [415]
    def parse(self, response):
        print "+++++", response.url
        with open("mygofer.txt","wb") as v:
            v.write(response.body)

几件事:

  • 您要发送的正文已经 JSON 编码,因此您要使用 body 参数,而不是 formdata(用于 key/value 对到 URL-编码)
  • 您需要指明您的 HTTP 请求正文的 Content-Type(我的 Chrome 浏览器正在发送 Content-Type: application/json;charset=UTF-8
  • 显然,"null" 值在网站上表现不佳,请使用 null

示例 shell 会话:

$ scrapy shell -s USER_AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36" 'https://www.mygofer.com/furniture/b-34790/rowCount_120?keyword=south%20shore%20furniture'
(...)
>>> frq = scrapy.FormRequest("https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search",
...     method="POST",
...     body='''{"filters":{},
... "brandFilter":null,
... "sellersFilter":null,
... "catgroupId":"34790",
... "levelOne":null,
... "searchMode":"BROWSE",
... "sortBy":"RECOMMENDED",
... "keyword":"south%20shore%20furniture",
... "pageNum":"1",
... "rowCount":"120",
... "ffmMode":"ALL",
... "priceFilter":null,
... "hideOOS":"true",
... "uNo":"4848",
... "session":{"guid":"0",
...           "emailId":"",
...           "sessionKey":"fcd3bcd1-b7bf-11e6-8e27-00505699251d",
...           "userId":"5970776",
...           "appId":"MYGOFER"},
... "security":{"src":"web",
...            "ts":"2016-12-01T12:58:28.994Z",
...            "authToken":""}}''',
...     headers={"Content-Type": "application/json;charset=UTF-8",
...     "Accept":"application/json, text/plain, */*"})
>>> fetch(frq)
2016-12-08 15:50:26 [scrapy] DEBUG: Crawled (200) <POST https://www.mygofer.com/lps-mygofer/api/v1/mygofer/search> (referer: None)
>>> 
>>>
>>> import json
>>> data = json.loads(response.text)
>>> len(data)
3
>>> data.keys()
[u'classType', u'payload', u'userRole']
>>> 
>>> from pprint import pprint
>>> 
>>> pprint(data)
{u'classType': u'com.shc.ecom.local.search.beans.output.SearchOutput',
 u'payload': {u'feature': {},
              u'filters': {u'levelThree': [{u'catGpId': u'28371',
                                            u'catGpPath': u'For the Home_Kids Room_Fun Accessories',
                                            u'count': 1,
                                            u'name': u'Fun Accessories',
                                            u'parentLevel': u'Kids Room',
                                            u'seoPath': u'for-the-home-kids-room-fun-accessories'},
...
                                           {u'catGpId': u'1231474854',
                                            u'catGpPath': u'TVs & Electronics_Media Furniture_TV Stands',
                                            u'count': 69,
                                            u'name': u'TV Stands',
                                            u'parentLevel': u'Media Furniture',
                                            u'seoPath': u'tvs-electronics-media-furniture-tv-stands'}],
                           u'narrowBy': [{u'count': 8,
                                          u'name': u'Double Sided',
                                          u'value': u'Yes'},
                                         {u'count': 4,
                                          u'name': u'Upholstered',
                                          u'value': u'No'},
                                         {u'count': 24,
                                          u'name': u'Mobile',
                                          u'value': u'Yes'},
                                         {u'count': 24,
                                          u'name': u'Fire Resistant',
                                          u'value': u'No'}],
                           u'otherFilters': {u'Assembly': {u'Assembled': 2,
                                                           u'Ready to assemble': 770},
                                             u'Audience': {u'Adult': 262,
                                                           u'All ages': 7,
                                                           u'Dorm/College': 2,
                                                           u'Kids': 351,
                                                           u'Teen': 12},
...
                                             u'Width Range (in.)': {u'12 - 24 in.': 8,
                                                                    u'25 - 36 in.': 106,
                                                                    u'37 - 48 in.': 32,
                                                                    u'49 - 60 in.': 70,
                                                                    u'61 - 72 in.': 4,
                                                                    u'Less than 12 in.': 2}},
                           u'priceRanges': [{u'cnt': u'262',
                                             u'high': u'100',
                                             u'low': u'0'},
                                            {u'cnt': u'269',
                                             u'high': u'150',
                                             u'low': u'100'},
                                            {u'cnt': u'251',
                                             u'high': u'200',
                                             u'low': u'150'},
                                            {u'cnt': u'219',
                                             u'high': u'275',
                                             u'low': u'200'},
                                            {u'cnt': u'94',
                                             u'high': u'above',
                                             u'low': u'275'}]},
              u'keyword': u'south%20shore%20furniture',
              u'levelOne': {u'catGpId': u'34790',
                            u'catGpPath': u'Furniture',
                            u'name': u'Furniture',
                            u'seoPath': u'furniture'},
              u'maxPrice': u'2539.19',
              u'minPrice': u'12.65',
              u'numFound': u'1095',
              u'products': [{u'availFFMs': [u'SHIP'],
                             u'brand': u'South Shore',
                             u'ffm': u'VD',
                             u'freeShip': u'0',
                             u'img': u'http://c.shld.net/rpx/i/s/pi/mp/20571/prod_6578221517?src=http%3A%2F%2Fak1.ostkcdn.com%2Fimages%2Fproducts%2F9810550%2FSouth-Shore-Willow-Twin-Bookcase-Headboard-39-Sumptuous-Cherry-0da3d88a-cb6a-4048-80d4-be464e85da49.jpg&d=8d8fee1e07dc750e2fb7c5711a500bf32278595c',
                             u'isInCart': False,
                             u'itemPartNumber': u'SPM9120228717',
                             u'mailable': u'1',
                             u'mfpartno': u'3356098-9810550',
                             u'name': u'South Shore Willow Twin Bookcase Headboard  Sumptuous Cherry',
                             u'partNumber': u'SPM9120228717',
                             u'prdType': u'NONVARIATION',
                             u'price': {u'mapViolation': False,
                                        u'pid': u'SPM9120228717'},
                             u'qtyInCart': 0,
                             u'rating': 0.0,
                             u'reviews': 0,
                             u'salePrice': 87.11,
                             u'shipStock': u'1',
                             u'soldBy': u'Overstock.com',
                             u'solrSalePrice': 87.11,
                             u'storePrice': False,
                             u'type': u'NONVARIATION'},
...
                            {u'availFFMs': [u'SHIP'],
                             u'brand': u'South Shore',
                             u'ffm': u'VD',
                             u'freeShip': u'1',
                             u'img': u'http://c.shld.net/rpx/i/s/i/spin/image/spin_prod_204451401',
                             u'isInCart': False,
                             u'itemPartNumber': u'00827455000',
                             u'mailable': u'1',
                             u'mfpartno': u'7250767',
                             u'name': u'Axess Collection 4-Shelf Bookcasen Pure White',
                             u'partNumber': u'00827455000P',
                             u'prdType': u'NONVARIATION',
                             u'price': {u'clearancePrice': u'0.00',
                                        u'mapViolation': False,
                                        u'pid': u'00827455000',
                                        u'priceType': u'P',
                                        u'promoPrice': u'67.49',
                                        u'regularPrice': u'74.99',
                                        u'salePrice': u'67.49',
                                        u'savings': u'7.5'},
                             u'qtyInCart': 0,
                             u'rating': 0.0,
                             u'reviews': 0,
                             u'salePrice': 67.49,
                             u'shipStock': u'1',
                             u'soldBy': u'Sears',
                             u'solrSalePrice': 59.71,
                             u'storePrice': False,
                             u'type': u'NONVARIATION'}],
              u'query': u'http://solrx416p.prod.ch4.s.com:8380/search/select?qt=simpleallsubcat&q=south%20shore%20furniture&wt=json&start=0&rows=120&fq=catalogs:("27151")&fq=level1Cats:("27151_Furniture")&fq=storeAttributes:(!"10175_OUTOFSTOCK_INDICATOR=1")&fq=!(storeAttributes:("10175_DEFAULT_FULFILLMENT=DDC" OR "10175_DEFAULT_FULFILLMENT=KRES" OR "10175_DEFAULT_FULFILLMENT=CRES" OR "10175_DEFAULT_FULFILLMENT=DRES" OR "10175_DEFAULT_FULFILLMENT=SRES" OR "10175_DEFAULT_FULFILLMENT=PLSFS"))&sort=instock desc,fulfillment desc,imageStatus desc,score desc&clientID=MyGofer&sortPrefix=4848~10175&globalPrefix=4848,10175',
              u'relevancyRedirect': False,
              u'status': u'success',
              u'twItems': []},
 u'userRole': None}
>>> 

这是 5 年后的事了,但您可以使用 scrapy.http.JsonRequest 来处理 JSON 有效载荷 - 文档中的示例:

data = {
'name1': 'value1',
'name2': 'value2',
}

yield JsonRequest(url='http://www.example.com/post/action', data=data)