Json 使用 Scrapy 屈服时出现响应错误

Json response error when yielding with Scrapy

我正在尝试从网页的响应中生成一些数据,以便我知道我已经正确地实现了代码。不幸的是,情况并非如此,因为我收到以下错误:

json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

我知道当我尝试从数据中获取响应时,它会出现在函数 parse 中。但是,我不明白为什么它不起作用。

这是我的脚本:

import scrapy
from scrapy_splash import SplashFormRequest


headers = {
    'authority': 'www.etsy.com',
    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
    'x-csrf-token': '3:1641383062:Exn8HMFDcc0UtitU6NOM3o3x8BGB:864dc90d926383d90686f37be56f69685b939f0f306b10a99bcd9016209f15d4',
    'sec-ch-ua-mobile': '?0',
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
    'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'accept': '*/*',
    'x-requested-with': 'XMLHttpRequest',
    'x-page-guid': 'eeda48b359a.aa23cce28f31baac6f24.00',
    'x-detected-locale': 'GBP|en-GB|GB',
    'sec-ch-ua-platform': '"Linux"',
    'origin': 'https://www.etsy.com',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-mode': 'cors',
    'sec-fetch-dest': 'empty',
    'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
    'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
    'cookie': 'uaid=G-_aWcvXqYHevnNO3ane9nOUmwNjZACCxCuVe2B0tVJpYmaKkpVSaVpUSoBZaGZVQL6Lj4mRv7ObrmmRR3F-aLyHp1ItAwA.; user_prefs=bNwL2wOEkWxqOSu2A1-CWlR6cr9jZACCxCuVe2B0tJK7U4CSTl5pTo6OUmqerruTko4SiACLGEEoXEQsAwA.; fve=1641314748.0; utm_lps=google__cpc; ua=531227642bc86f3b5fd7103a0c0b4fd6; p=eyJnZHByX3RwIjoxLCJnZHByX3AiOjF9; _gcl_au=1.1.1757627174.1641314793; _gid=GA1.2.1898390797.1641314793; __adal_cw=1641314793715; _pin_unauth=dWlkPVltVmtZemxoTldNdFpURXdPQzAwWkRWbUxXRTJOV1l0TTJGaE9URXdZVEEwTlRBeQ; last_browse_page=https%3A%2F%2Fwww.etsy.com%2Fuk%2F; __adal_ses=*; __adal_ca=so%3DGoogle%26me%3Dorganic%26ca%3D%28not%2520set%29%26co%3D%28not%2520set%29%26ke%3D%28not%2520set%29; search_options={"prev_search_term":"20s","item_language":null,"language_carousel":null}; _ga=GA1.2.559839679.1641314793; tsd=%7B%7D; __adal_id=952d43d7-5b80-4907-99d7-6f6baa9f4fe1.1641314794.3.1641383063.1641383059.2fe7a338-93bd-441f-b295-80549adbef7b; _tq_id.TV-27270909-1.a4d5=e2f6af8c27dee5e4.1641314794.0.1641383063..; _uetsid=dff577e06d7d11ec9617cbf4cc51b5b2; _uetvid=dff5f2706d7d11ec932fd3c5b816ab20; granify.uuid=bfd14e46-e8fa-4e7b-bce7-6f05dcb4b215; pla_spr=1; _ga_KR3J610VYM=GS1.1.1641383058.3.1.1641383118.60; exp_hangover=qk2fpkLi1lphuLsCKeq4gAe9BvxjZACCxCuVe8D01Zbb1UrlqUnxiUUlmWmZyZmJOfE5iSWpecmV8YUm8UYGhpZKVkqZeak5memZSTmpSrUMAA..; granify.session.QrsCf=-1',
}



class EtsySpider(scrapy.Spider):
    name = 'etit'
    start_urls = ['https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results']

    custom_settings = {
        'USER_AGENT':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
    }
    def start_requests(self):
        for url in self.start_urls:
            yield SplashFormRequest(
                url,
                method = "POST",
                formdata = {
                    'log_performance_metrics': 'true',
                    'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
                    'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-GB',
                    'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'GBP',
                    'specs[async_search_results][1][search_request_params][detected_locale][region]': 'GB',
                    'specs[async_search_results][1][search_request_params][locale][language]': 'en-GB',
                    'specs[async_search_results][1][search_request_params][locale][currency_code]': 'GBP',
                    'specs[async_search_results][1][search_request_params][locale][region]': 'GB',
                    'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
                    'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
                    'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
                    'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
                    'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
                    'specs[async_search_results][1][search_request_params][parameters][q]': '30s',
                    'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
                    'specs[async_search_results][1][search_request_params][parameters][locationQuery]': '2635167',
                    'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
                    'specs[async_search_results][1][search_request_params][parameters][page]': '4',
                    'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
                    'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
                    'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=30s&explicit=1locationQuery=2635167&ship_to=GB&page=3&ref=pagination',
                    'specs[async_search_results][1][search_request_params][user_id]': '',
                    'specs[async_search_results][1][request_type]': 'pagination_preact',
                    'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'false',
                    'view_data_event_name': 'search_async_pagination_specview_rendered'
                },
                headers=headers,
                callback = self.parse
            )
    
    def parse(self, response):
        stuff = response.json().get('cssFiles')
        yield {
            'stuff':stuff
        }

我已经尝试过请求,它有效:

import requests

headers = {
    'authority': 'www.etsy.com',
    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
    'x-csrf-token': '3:1641383062:Exn8HMFDcc0UtitU6NOM3o3x8BGB:864dc90d926383d90686f37be56f69685b939f0f306b10a99bcd9016209f15d4',
    'sec-ch-ua-mobile': '?0',
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
    'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'accept': '*/*',
    'x-requested-with': 'XMLHttpRequest',
    'x-page-guid': 'eeda48b359a.aa23cce28f31baac6f24.00',
    'x-detected-locale': 'GBP|en-GB|GB',
    'sec-ch-ua-platform': '"Linux"',
    'origin': 'https://www.etsy.com',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-mode': 'cors',
    'sec-fetch-dest': 'empty',
    'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
    'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
    'cookie': 'uaid=G-_aWcvXqYHevnNO3ane9nOUmwNjZACCxCuVe2B0tVJpYmaKkpVSaVpUSoBZaGZVQL6Lj4mRv7ObrmmRR3F-aLyHp1ItAwA.; user_prefs=bNwL2wOEkWxqOSu2A1-CWlR6cr9jZACCxCuVe2B0tJK7U4CSTl5pTo6OUmqerruTko4SiACLGEEoXEQsAwA.; fve=1641314748.0; utm_lps=google__cpc; ua=531227642bc86f3b5fd7103a0c0b4fd6; p=eyJnZHByX3RwIjoxLCJnZHByX3AiOjF9; _gcl_au=1.1.1757627174.1641314793; _gid=GA1.2.1898390797.1641314793; __adal_cw=1641314793715; _pin_unauth=dWlkPVltVmtZemxoTldNdFpURXdPQzAwWkRWbUxXRTJOV1l0TTJGaE9URXdZVEEwTlRBeQ; last_browse_page=https%3A%2F%2Fwww.etsy.com%2Fuk%2F; __adal_ses=*; __adal_ca=so%3DGoogle%26me%3Dorganic%26ca%3D%28not%2520set%29%26co%3D%28not%2520set%29%26ke%3D%28not%2520set%29; search_options={"prev_search_term":"20s","item_language":null,"language_carousel":null}; _ga=GA1.2.559839679.1641314793; tsd=%7B%7D; __adal_id=952d43d7-5b80-4907-99d7-6f6baa9f4fe1.1641314794.3.1641383063.1641383059.2fe7a338-93bd-441f-b295-80549adbef7b; _tq_id.TV-27270909-1.a4d5=e2f6af8c27dee5e4.1641314794.0.1641383063..; _uetsid=dff577e06d7d11ec9617cbf4cc51b5b2; _uetvid=dff5f2706d7d11ec932fd3c5b816ab20; granify.uuid=bfd14e46-e8fa-4e7b-bce7-6f05dcb4b215; pla_spr=1; _ga_KR3J610VYM=GS1.1.1641383058.3.1.1641383118.60; exp_hangover=qk2fpkLi1lphuLsCKeq4gAe9BvxjZACCxCuVe8D01Zbb1UrlqUnxiUUlmWmZyZmJOfE5iSWpecmV8YUm8UYGhpZKVkqZeak5memZSTmpSrUMAA..; granify.session.QrsCf=-1',
}

data = {
  'log_performance_metrics': 'true',
  'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
  'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-GB',
  'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'GBP',
  'specs[async_search_results][1][search_request_params][detected_locale][region]': 'GB',
  'specs[async_search_results][1][search_request_params][locale][language]': 'en-GB',
  'specs[async_search_results][1][search_request_params][locale][currency_code]': 'GBP',
  'specs[async_search_results][1][search_request_params][locale][region]': 'GB',
  'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
  'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
  'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
  'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
  'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
  'specs[async_search_results][1][search_request_params][parameters][q]': '20s',
  'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
  'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
  'specs[async_search_results][1][search_request_params][parameters][page]': '2',
  'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
  'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
  'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB',
  'specs[async_search_results][1][search_request_params][user_id]': '',
  'specs[async_search_results][1][request_type]': 'pagination_preact',
  'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'false',
  'view_data_event_name': 'search_async_pagination_specview_rendered'
}

requests.post('https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results', headers=headers, data=data)
#<Response [200]>

我们需要使用 cookies 来获取所需的数据,而不是在 headers 中使用它,我们需要将它们移动到 cookies

    def start_requests(self):

        headers = {
            'authority': 'www.etsy.com',
            'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
            'x-csrf-token': '3:1641390466:3d9EJ5Y1lwN6z_d3nn2qROS-IK6z:476df27e75d2b310bb79d565bbb3fa66b6c6d1ec26c137e6b98a8265a8447b4c',
            'sec-ch-ua-mobile': '?0',
            'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
            'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'accept': '*/*',
            'x-requested-with': 'XMLHttpRequest',
            'x-page-guid': 'eeda8f50e2a.c5a8a0ae59e2ab4a8635.00',
            'x-detected-locale': 'USD|en-US|UA',
            'sec-ch-ua-platform': '"Windows"',
            'origin': 'https://www.etsy.com',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-mode': 'cors',
            'sec-fetch-dest': 'empty',
            'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=3&ref=pagination',
            'accept-language': 'en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7,uk;q=0.6,en-GB;q=0.5',
            # 'cookie': 'user_prefs=2sjEL59UUglDjNIW6TKc04MvLTVjZACCxJMbvsPoaKXQYBclnbzSnBwdpdQ83dBgJR2lUEeoiBGEwkXEMgAA; fve=1640607991.0; ua=531227642bc86f3b5fd7103a0c0b4fd6; _gcl_au=1.1.717562651.1640607992; uaid=E7bYwrWVwTy7YGe_b_ipYT3Avd9jZACCxJMbvoPpqwvzqpVKEzNTlKyUnLJ9Io3DTQt1k53MwiojXTLzvZPCS31yCoPC_JRqGQA.; pla_spr=0; _gid=GA1.2.1425785976.1641390447; _dc_gtm_UA-2409779-1=1; _pin_unauth=dWlkPU0yVTRaamxoTWpjdFlqTTVZUzAwT0RJeExXRmpNamt0WlROalpXTTVNREE0WkRVNQ; _ga=GA1.1.1730759327.1640607993; _uetsid=052ece906e2e11ecb56a0390ed629376; _uetvid=39de7550671011ec80d2dbfaa05c901b; exp_hangover=pB4zSokzfzMIT9Jzi7zIwmXybCJjZACCxJMbvoPpqwt7qpXKU5PiE4tKMtMykzMTc-JzEktS85Ir4wtN4o0MDC2VrJQy81JzMtMzk3JSlWoZAA..; _ga_KR3J610VYM=GS1.1.1641390446.2.1.1641390474.32',
        }

        data = {
          'log_performance_metrics': 'true',
          'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
          'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-US',
          'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'USD',
          'specs[async_search_results][1][search_request_params][detected_locale][region]': 'UA',
          'specs[async_search_results][1][search_request_params][locale][language]': 'en-US',
          'specs[async_search_results][1][search_request_params][locale][currency_code]': 'USD',
          'specs[async_search_results][1][search_request_params][locale][region]': 'UA',
          'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
          'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
          'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
          'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
          'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
          'specs[async_search_results][1][search_request_params][parameters][q]': '20s',
          'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
          'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
          'specs[async_search_results][1][search_request_params][parameters][page]': '3',
          'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
          'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
          'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
          'specs[async_search_results][1][search_request_params][user_id]': '',
          'specs[async_search_results][1][request_type]': 'pagination_preact',
          'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'true',
          'view_data_event_name': 'search_async_pagination_specview_rendered'
        }

        cookies = {
            "user_prefs": "2sjEL59UUglDjNIW6TKc04MvLTVjZACCxJMbvsPoaKXQYBclnbzSnBwdpdQ83dBgJR2lUEeoiBGEwkXEMgAA",
            "fve": "1640607991.0",
            "ua": "531227642bc86f3b5fd7103a0c0b4fd6",
            "_gcl_au": "1.1.717562651.1640607992",
            "uaid": "E7bYwrWVwTy7YGe_b_ipYT3Avd9jZACCxJMbvoPpqwvzqpVKEzNTlKyUnLJ9Io3DTQt1k53MwiojXTLzvZPCS31yCoPC_JRqGQA.",
            "pla_spr": "0",
            "_gid": "GA1.2.1425785976.1641390447",
            "_dc_gtm_UA-2409779-1": "1",
            "_pin_unauth": "dWlkPU0yVTRaamxoTWpjdFlqTTVZUzAwT0RJeExXRmpNamt0WlROalpXTTVNREE0WkRVNQ",
            "_ga": "GA1.1.1730759327.1640607993",
            "_uetsid": "052ece906e2e11ecb56a0390ed629376",
            "_uetvid": "39de7550671011ec80d2dbfaa05c901b",
            "exp_hangover": "pB4zSokzfzMIT9Jzi7zIwmXybCJjZACCxJMbvoPpqwt7qpXKU5PiE4tKMtMykzMTc-JzEktS85Ir4wtN4o0MDC2VrJQy81JzMtMzk3JSlWoZAA..",
            "_ga_KR3J610VYM": "GS1.1.1641390446.2.1.1641390474.32"

        }

        for url in self.start_urls:
            yield scrapy.FormRequest(
                'https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results',
                headers=headers,
                cookies=cookies,
                method="POST",
                formdata=data,
                callback = self.parse_res
            )