Json 使用 Scrapy 屈服时出现响应错误
Json response error when yielding with Scrapy
我正在尝试从网页的响应中生成一些数据,以便我知道我已经正确地实现了代码。不幸的是,情况并非如此,因为我收到以下错误:
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
我知道当我尝试从数据中获取响应时,它会出现在函数 parse
中。但是,我不明白为什么它不起作用。
这是我的脚本:
import scrapy
from scrapy_splash import SplashFormRequest
headers = {
'authority': 'www.etsy.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
'x-csrf-token': '3:1641383062:Exn8HMFDcc0UtitU6NOM3o3x8BGB:864dc90d926383d90686f37be56f69685b939f0f306b10a99bcd9016209f15d4',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': '*/*',
'x-requested-with': 'XMLHttpRequest',
'x-page-guid': 'eeda48b359a.aa23cce28f31baac6f24.00',
'x-detected-locale': 'GBP|en-GB|GB',
'sec-ch-ua-platform': '"Linux"',
'origin': 'https://www.etsy.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
'cookie': 'uaid=G-_aWcvXqYHevnNO3ane9nOUmwNjZACCxCuVe2B0tVJpYmaKkpVSaVpUSoBZaGZVQL6Lj4mRv7ObrmmRR3F-aLyHp1ItAwA.; user_prefs=bNwL2wOEkWxqOSu2A1-CWlR6cr9jZACCxCuVe2B0tJK7U4CSTl5pTo6OUmqerruTko4SiACLGEEoXEQsAwA.; fve=1641314748.0; utm_lps=google__cpc; ua=531227642bc86f3b5fd7103a0c0b4fd6; p=eyJnZHByX3RwIjoxLCJnZHByX3AiOjF9; _gcl_au=1.1.1757627174.1641314793; _gid=GA1.2.1898390797.1641314793; __adal_cw=1641314793715; _pin_unauth=dWlkPVltVmtZemxoTldNdFpURXdPQzAwWkRWbUxXRTJOV1l0TTJGaE9URXdZVEEwTlRBeQ; last_browse_page=https%3A%2F%2Fwww.etsy.com%2Fuk%2F; __adal_ses=*; __adal_ca=so%3DGoogle%26me%3Dorganic%26ca%3D%28not%2520set%29%26co%3D%28not%2520set%29%26ke%3D%28not%2520set%29; search_options={"prev_search_term":"20s","item_language":null,"language_carousel":null}; _ga=GA1.2.559839679.1641314793; tsd=%7B%7D; __adal_id=952d43d7-5b80-4907-99d7-6f6baa9f4fe1.1641314794.3.1641383063.1641383059.2fe7a338-93bd-441f-b295-80549adbef7b; _tq_id.TV-27270909-1.a4d5=e2f6af8c27dee5e4.1641314794.0.1641383063..; _uetsid=dff577e06d7d11ec9617cbf4cc51b5b2; _uetvid=dff5f2706d7d11ec932fd3c5b816ab20; granify.uuid=bfd14e46-e8fa-4e7b-bce7-6f05dcb4b215; pla_spr=1; _ga_KR3J610VYM=GS1.1.1641383058.3.1.1641383118.60; exp_hangover=qk2fpkLi1lphuLsCKeq4gAe9BvxjZACCxCuVe8D01Zbb1UrlqUnxiUUlmWmZyZmJOfE5iSWpecmV8YUm8UYGhpZKVkqZeak5memZSTmpSrUMAA..; granify.session.QrsCf=-1',
}
class EtsySpider(scrapy.Spider):
name = 'etit'
start_urls = ['https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results']
custom_settings = {
'USER_AGENT':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
def start_requests(self):
for url in self.start_urls:
yield SplashFormRequest(
url,
method = "POST",
formdata = {
'log_performance_metrics': 'true',
'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-GB',
'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'GBP',
'specs[async_search_results][1][search_request_params][detected_locale][region]': 'GB',
'specs[async_search_results][1][search_request_params][locale][language]': 'en-GB',
'specs[async_search_results][1][search_request_params][locale][currency_code]': 'GBP',
'specs[async_search_results][1][search_request_params][locale][region]': 'GB',
'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
'specs[async_search_results][1][search_request_params][parameters][q]': '30s',
'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
'specs[async_search_results][1][search_request_params][parameters][locationQuery]': '2635167',
'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
'specs[async_search_results][1][search_request_params][parameters][page]': '4',
'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=30s&explicit=1locationQuery=2635167&ship_to=GB&page=3&ref=pagination',
'specs[async_search_results][1][search_request_params][user_id]': '',
'specs[async_search_results][1][request_type]': 'pagination_preact',
'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'false',
'view_data_event_name': 'search_async_pagination_specview_rendered'
},
headers=headers,
callback = self.parse
)
def parse(self, response):
stuff = response.json().get('cssFiles')
yield {
'stuff':stuff
}
我已经尝试过请求,它有效:
import requests
headers = {
'authority': 'www.etsy.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
'x-csrf-token': '3:1641383062:Exn8HMFDcc0UtitU6NOM3o3x8BGB:864dc90d926383d90686f37be56f69685b939f0f306b10a99bcd9016209f15d4',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': '*/*',
'x-requested-with': 'XMLHttpRequest',
'x-page-guid': 'eeda48b359a.aa23cce28f31baac6f24.00',
'x-detected-locale': 'GBP|en-GB|GB',
'sec-ch-ua-platform': '"Linux"',
'origin': 'https://www.etsy.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
'cookie': 'uaid=G-_aWcvXqYHevnNO3ane9nOUmwNjZACCxCuVe2B0tVJpYmaKkpVSaVpUSoBZaGZVQL6Lj4mRv7ObrmmRR3F-aLyHp1ItAwA.; user_prefs=bNwL2wOEkWxqOSu2A1-CWlR6cr9jZACCxCuVe2B0tJK7U4CSTl5pTo6OUmqerruTko4SiACLGEEoXEQsAwA.; fve=1641314748.0; utm_lps=google__cpc; ua=531227642bc86f3b5fd7103a0c0b4fd6; p=eyJnZHByX3RwIjoxLCJnZHByX3AiOjF9; _gcl_au=1.1.1757627174.1641314793; _gid=GA1.2.1898390797.1641314793; __adal_cw=1641314793715; _pin_unauth=dWlkPVltVmtZemxoTldNdFpURXdPQzAwWkRWbUxXRTJOV1l0TTJGaE9URXdZVEEwTlRBeQ; last_browse_page=https%3A%2F%2Fwww.etsy.com%2Fuk%2F; __adal_ses=*; __adal_ca=so%3DGoogle%26me%3Dorganic%26ca%3D%28not%2520set%29%26co%3D%28not%2520set%29%26ke%3D%28not%2520set%29; search_options={"prev_search_term":"20s","item_language":null,"language_carousel":null}; _ga=GA1.2.559839679.1641314793; tsd=%7B%7D; __adal_id=952d43d7-5b80-4907-99d7-6f6baa9f4fe1.1641314794.3.1641383063.1641383059.2fe7a338-93bd-441f-b295-80549adbef7b; _tq_id.TV-27270909-1.a4d5=e2f6af8c27dee5e4.1641314794.0.1641383063..; _uetsid=dff577e06d7d11ec9617cbf4cc51b5b2; _uetvid=dff5f2706d7d11ec932fd3c5b816ab20; granify.uuid=bfd14e46-e8fa-4e7b-bce7-6f05dcb4b215; pla_spr=1; _ga_KR3J610VYM=GS1.1.1641383058.3.1.1641383118.60; exp_hangover=qk2fpkLi1lphuLsCKeq4gAe9BvxjZACCxCuVe8D01Zbb1UrlqUnxiUUlmWmZyZmJOfE5iSWpecmV8YUm8UYGhpZKVkqZeak5memZSTmpSrUMAA..; granify.session.QrsCf=-1',
}
data = {
'log_performance_metrics': 'true',
'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-GB',
'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'GBP',
'specs[async_search_results][1][search_request_params][detected_locale][region]': 'GB',
'specs[async_search_results][1][search_request_params][locale][language]': 'en-GB',
'specs[async_search_results][1][search_request_params][locale][currency_code]': 'GBP',
'specs[async_search_results][1][search_request_params][locale][region]': 'GB',
'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
'specs[async_search_results][1][search_request_params][parameters][q]': '20s',
'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
'specs[async_search_results][1][search_request_params][parameters][page]': '2',
'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB',
'specs[async_search_results][1][search_request_params][user_id]': '',
'specs[async_search_results][1][request_type]': 'pagination_preact',
'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'false',
'view_data_event_name': 'search_async_pagination_specview_rendered'
}
requests.post('https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results', headers=headers, data=data)
#<Response [200]>
我们需要使用 cookies
来获取所需的数据,而不是在 headers 中使用它,我们需要将它们移动到 cookies
def start_requests(self):
headers = {
'authority': 'www.etsy.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
'x-csrf-token': '3:1641390466:3d9EJ5Y1lwN6z_d3nn2qROS-IK6z:476df27e75d2b310bb79d565bbb3fa66b6c6d1ec26c137e6b98a8265a8447b4c',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': '*/*',
'x-requested-with': 'XMLHttpRequest',
'x-page-guid': 'eeda8f50e2a.c5a8a0ae59e2ab4a8635.00',
'x-detected-locale': 'USD|en-US|UA',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.etsy.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=3&ref=pagination',
'accept-language': 'en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7,uk;q=0.6,en-GB;q=0.5',
# 'cookie': 'user_prefs=2sjEL59UUglDjNIW6TKc04MvLTVjZACCxJMbvsPoaKXQYBclnbzSnBwdpdQ83dBgJR2lUEeoiBGEwkXEMgAA; fve=1640607991.0; ua=531227642bc86f3b5fd7103a0c0b4fd6; _gcl_au=1.1.717562651.1640607992; uaid=E7bYwrWVwTy7YGe_b_ipYT3Avd9jZACCxJMbvoPpqwvzqpVKEzNTlKyUnLJ9Io3DTQt1k53MwiojXTLzvZPCS31yCoPC_JRqGQA.; pla_spr=0; _gid=GA1.2.1425785976.1641390447; _dc_gtm_UA-2409779-1=1; _pin_unauth=dWlkPU0yVTRaamxoTWpjdFlqTTVZUzAwT0RJeExXRmpNamt0WlROalpXTTVNREE0WkRVNQ; _ga=GA1.1.1730759327.1640607993; _uetsid=052ece906e2e11ecb56a0390ed629376; _uetvid=39de7550671011ec80d2dbfaa05c901b; exp_hangover=pB4zSokzfzMIT9Jzi7zIwmXybCJjZACCxJMbvoPpqwt7qpXKU5PiE4tKMtMykzMTc-JzEktS85Ir4wtN4o0MDC2VrJQy81JzMtMzk3JSlWoZAA..; _ga_KR3J610VYM=GS1.1.1641390446.2.1.1641390474.32',
}
data = {
'log_performance_metrics': 'true',
'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-US',
'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'USD',
'specs[async_search_results][1][search_request_params][detected_locale][region]': 'UA',
'specs[async_search_results][1][search_request_params][locale][language]': 'en-US',
'specs[async_search_results][1][search_request_params][locale][currency_code]': 'USD',
'specs[async_search_results][1][search_request_params][locale][region]': 'UA',
'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
'specs[async_search_results][1][search_request_params][parameters][q]': '20s',
'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
'specs[async_search_results][1][search_request_params][parameters][page]': '3',
'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
'specs[async_search_results][1][search_request_params][user_id]': '',
'specs[async_search_results][1][request_type]': 'pagination_preact',
'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'true',
'view_data_event_name': 'search_async_pagination_specview_rendered'
}
cookies = {
"user_prefs": "2sjEL59UUglDjNIW6TKc04MvLTVjZACCxJMbvsPoaKXQYBclnbzSnBwdpdQ83dBgJR2lUEeoiBGEwkXEMgAA",
"fve": "1640607991.0",
"ua": "531227642bc86f3b5fd7103a0c0b4fd6",
"_gcl_au": "1.1.717562651.1640607992",
"uaid": "E7bYwrWVwTy7YGe_b_ipYT3Avd9jZACCxJMbvoPpqwvzqpVKEzNTlKyUnLJ9Io3DTQt1k53MwiojXTLzvZPCS31yCoPC_JRqGQA.",
"pla_spr": "0",
"_gid": "GA1.2.1425785976.1641390447",
"_dc_gtm_UA-2409779-1": "1",
"_pin_unauth": "dWlkPU0yVTRaamxoTWpjdFlqTTVZUzAwT0RJeExXRmpNamt0WlROalpXTTVNREE0WkRVNQ",
"_ga": "GA1.1.1730759327.1640607993",
"_uetsid": "052ece906e2e11ecb56a0390ed629376",
"_uetvid": "39de7550671011ec80d2dbfaa05c901b",
"exp_hangover": "pB4zSokzfzMIT9Jzi7zIwmXybCJjZACCxJMbvoPpqwt7qpXKU5PiE4tKMtMykzMTc-JzEktS85Ir4wtN4o0MDC2VrJQy81JzMtMzk3JSlWoZAA..",
"_ga_KR3J610VYM": "GS1.1.1641390446.2.1.1641390474.32"
}
for url in self.start_urls:
yield scrapy.FormRequest(
'https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results',
headers=headers,
cookies=cookies,
method="POST",
formdata=data,
callback = self.parse_res
)
我正在尝试从网页的响应中生成一些数据,以便我知道我已经正确地实现了代码。不幸的是,情况并非如此,因为我收到以下错误:
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
我知道当我尝试从数据中获取响应时,它会出现在函数 parse
中。但是,我不明白为什么它不起作用。
这是我的脚本:
import scrapy
from scrapy_splash import SplashFormRequest
headers = {
'authority': 'www.etsy.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
'x-csrf-token': '3:1641383062:Exn8HMFDcc0UtitU6NOM3o3x8BGB:864dc90d926383d90686f37be56f69685b939f0f306b10a99bcd9016209f15d4',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': '*/*',
'x-requested-with': 'XMLHttpRequest',
'x-page-guid': 'eeda48b359a.aa23cce28f31baac6f24.00',
'x-detected-locale': 'GBP|en-GB|GB',
'sec-ch-ua-platform': '"Linux"',
'origin': 'https://www.etsy.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
'cookie': 'uaid=G-_aWcvXqYHevnNO3ane9nOUmwNjZACCxCuVe2B0tVJpYmaKkpVSaVpUSoBZaGZVQL6Lj4mRv7ObrmmRR3F-aLyHp1ItAwA.; user_prefs=bNwL2wOEkWxqOSu2A1-CWlR6cr9jZACCxCuVe2B0tJK7U4CSTl5pTo6OUmqerruTko4SiACLGEEoXEQsAwA.; fve=1641314748.0; utm_lps=google__cpc; ua=531227642bc86f3b5fd7103a0c0b4fd6; p=eyJnZHByX3RwIjoxLCJnZHByX3AiOjF9; _gcl_au=1.1.1757627174.1641314793; _gid=GA1.2.1898390797.1641314793; __adal_cw=1641314793715; _pin_unauth=dWlkPVltVmtZemxoTldNdFpURXdPQzAwWkRWbUxXRTJOV1l0TTJGaE9URXdZVEEwTlRBeQ; last_browse_page=https%3A%2F%2Fwww.etsy.com%2Fuk%2F; __adal_ses=*; __adal_ca=so%3DGoogle%26me%3Dorganic%26ca%3D%28not%2520set%29%26co%3D%28not%2520set%29%26ke%3D%28not%2520set%29; search_options={"prev_search_term":"20s","item_language":null,"language_carousel":null}; _ga=GA1.2.559839679.1641314793; tsd=%7B%7D; __adal_id=952d43d7-5b80-4907-99d7-6f6baa9f4fe1.1641314794.3.1641383063.1641383059.2fe7a338-93bd-441f-b295-80549adbef7b; _tq_id.TV-27270909-1.a4d5=e2f6af8c27dee5e4.1641314794.0.1641383063..; _uetsid=dff577e06d7d11ec9617cbf4cc51b5b2; _uetvid=dff5f2706d7d11ec932fd3c5b816ab20; granify.uuid=bfd14e46-e8fa-4e7b-bce7-6f05dcb4b215; pla_spr=1; _ga_KR3J610VYM=GS1.1.1641383058.3.1.1641383118.60; exp_hangover=qk2fpkLi1lphuLsCKeq4gAe9BvxjZACCxCuVe8D01Zbb1UrlqUnxiUUlmWmZyZmJOfE5iSWpecmV8YUm8UYGhpZKVkqZeak5memZSTmpSrUMAA..; granify.session.QrsCf=-1',
}
class EtsySpider(scrapy.Spider):
name = 'etit'
start_urls = ['https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results']
custom_settings = {
'USER_AGENT':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
def start_requests(self):
for url in self.start_urls:
yield SplashFormRequest(
url,
method = "POST",
formdata = {
'log_performance_metrics': 'true',
'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-GB',
'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'GBP',
'specs[async_search_results][1][search_request_params][detected_locale][region]': 'GB',
'specs[async_search_results][1][search_request_params][locale][language]': 'en-GB',
'specs[async_search_results][1][search_request_params][locale][currency_code]': 'GBP',
'specs[async_search_results][1][search_request_params][locale][region]': 'GB',
'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
'specs[async_search_results][1][search_request_params][parameters][q]': '30s',
'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
'specs[async_search_results][1][search_request_params][parameters][locationQuery]': '2635167',
'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
'specs[async_search_results][1][search_request_params][parameters][page]': '4',
'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=30s&explicit=1locationQuery=2635167&ship_to=GB&page=3&ref=pagination',
'specs[async_search_results][1][search_request_params][user_id]': '',
'specs[async_search_results][1][request_type]': 'pagination_preact',
'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'false',
'view_data_event_name': 'search_async_pagination_specview_rendered'
},
headers=headers,
callback = self.parse
)
def parse(self, response):
stuff = response.json().get('cssFiles')
yield {
'stuff':stuff
}
我已经尝试过请求,它有效:
import requests
headers = {
'authority': 'www.etsy.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
'x-csrf-token': '3:1641383062:Exn8HMFDcc0UtitU6NOM3o3x8BGB:864dc90d926383d90686f37be56f69685b939f0f306b10a99bcd9016209f15d4',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': '*/*',
'x-requested-with': 'XMLHttpRequest',
'x-page-guid': 'eeda48b359a.aa23cce28f31baac6f24.00',
'x-detected-locale': 'GBP|en-GB|GB',
'sec-ch-ua-platform': '"Linux"',
'origin': 'https://www.etsy.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
'cookie': 'uaid=G-_aWcvXqYHevnNO3ane9nOUmwNjZACCxCuVe2B0tVJpYmaKkpVSaVpUSoBZaGZVQL6Lj4mRv7ObrmmRR3F-aLyHp1ItAwA.; user_prefs=bNwL2wOEkWxqOSu2A1-CWlR6cr9jZACCxCuVe2B0tJK7U4CSTl5pTo6OUmqerruTko4SiACLGEEoXEQsAwA.; fve=1641314748.0; utm_lps=google__cpc; ua=531227642bc86f3b5fd7103a0c0b4fd6; p=eyJnZHByX3RwIjoxLCJnZHByX3AiOjF9; _gcl_au=1.1.1757627174.1641314793; _gid=GA1.2.1898390797.1641314793; __adal_cw=1641314793715; _pin_unauth=dWlkPVltVmtZemxoTldNdFpURXdPQzAwWkRWbUxXRTJOV1l0TTJGaE9URXdZVEEwTlRBeQ; last_browse_page=https%3A%2F%2Fwww.etsy.com%2Fuk%2F; __adal_ses=*; __adal_ca=so%3DGoogle%26me%3Dorganic%26ca%3D%28not%2520set%29%26co%3D%28not%2520set%29%26ke%3D%28not%2520set%29; search_options={"prev_search_term":"20s","item_language":null,"language_carousel":null}; _ga=GA1.2.559839679.1641314793; tsd=%7B%7D; __adal_id=952d43d7-5b80-4907-99d7-6f6baa9f4fe1.1641314794.3.1641383063.1641383059.2fe7a338-93bd-441f-b295-80549adbef7b; _tq_id.TV-27270909-1.a4d5=e2f6af8c27dee5e4.1641314794.0.1641383063..; _uetsid=dff577e06d7d11ec9617cbf4cc51b5b2; _uetvid=dff5f2706d7d11ec932fd3c5b816ab20; granify.uuid=bfd14e46-e8fa-4e7b-bce7-6f05dcb4b215; pla_spr=1; _ga_KR3J610VYM=GS1.1.1641383058.3.1.1641383118.60; exp_hangover=qk2fpkLi1lphuLsCKeq4gAe9BvxjZACCxCuVe8D01Zbb1UrlqUnxiUUlmWmZyZmJOfE5iSWpecmV8YUm8UYGhpZKVkqZeak5memZSTmpSrUMAA..; granify.session.QrsCf=-1',
}
data = {
'log_performance_metrics': 'true',
'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-GB',
'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'GBP',
'specs[async_search_results][1][search_request_params][detected_locale][region]': 'GB',
'specs[async_search_results][1][search_request_params][locale][language]': 'en-GB',
'specs[async_search_results][1][search_request_params][locale][currency_code]': 'GBP',
'specs[async_search_results][1][search_request_params][locale][region]': 'GB',
'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
'specs[async_search_results][1][search_request_params][parameters][q]': '20s',
'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
'specs[async_search_results][1][search_request_params][parameters][page]': '2',
'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB',
'specs[async_search_results][1][search_request_params][user_id]': '',
'specs[async_search_results][1][request_type]': 'pagination_preact',
'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'false',
'view_data_event_name': 'search_async_pagination_specview_rendered'
}
requests.post('https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results', headers=headers, data=data)
#<Response [200]>
我们需要使用 cookies
来获取所需的数据,而不是在 headers 中使用它,我们需要将它们移动到 cookies
def start_requests(self):
headers = {
'authority': 'www.etsy.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
'x-csrf-token': '3:1641390466:3d9EJ5Y1lwN6z_d3nn2qROS-IK6z:476df27e75d2b310bb79d565bbb3fa66b6c6d1ec26c137e6b98a8265a8447b4c',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': '*/*',
'x-requested-with': 'XMLHttpRequest',
'x-page-guid': 'eeda8f50e2a.c5a8a0ae59e2ab4a8635.00',
'x-detected-locale': 'USD|en-US|UA',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.etsy.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=3&ref=pagination',
'accept-language': 'en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7,uk;q=0.6,en-GB;q=0.5',
# 'cookie': 'user_prefs=2sjEL59UUglDjNIW6TKc04MvLTVjZACCxJMbvsPoaKXQYBclnbzSnBwdpdQ83dBgJR2lUEeoiBGEwkXEMgAA; fve=1640607991.0; ua=531227642bc86f3b5fd7103a0c0b4fd6; _gcl_au=1.1.717562651.1640607992; uaid=E7bYwrWVwTy7YGe_b_ipYT3Avd9jZACCxJMbvoPpqwvzqpVKEzNTlKyUnLJ9Io3DTQt1k53MwiojXTLzvZPCS31yCoPC_JRqGQA.; pla_spr=0; _gid=GA1.2.1425785976.1641390447; _dc_gtm_UA-2409779-1=1; _pin_unauth=dWlkPU0yVTRaamxoTWpjdFlqTTVZUzAwT0RJeExXRmpNamt0WlROalpXTTVNREE0WkRVNQ; _ga=GA1.1.1730759327.1640607993; _uetsid=052ece906e2e11ecb56a0390ed629376; _uetvid=39de7550671011ec80d2dbfaa05c901b; exp_hangover=pB4zSokzfzMIT9Jzi7zIwmXybCJjZACCxJMbvoPpqwt7qpXKU5PiE4tKMtMykzMTc-JzEktS85Ir4wtN4o0MDC2VrJQy81JzMtMzk3JSlWoZAA..; _ga_KR3J610VYM=GS1.1.1641390446.2.1.1641390474.32',
}
data = {
'log_performance_metrics': 'true',
'specs[async_search_results][]': 'Search2_ApiSpecs_WebSearch',
'specs[async_search_results][1][search_request_params][detected_locale][language]': 'en-US',
'specs[async_search_results][1][search_request_params][detected_locale][currency_code]': 'USD',
'specs[async_search_results][1][search_request_params][detected_locale][region]': 'UA',
'specs[async_search_results][1][search_request_params][locale][language]': 'en-US',
'specs[async_search_results][1][search_request_params][locale][currency_code]': 'USD',
'specs[async_search_results][1][search_request_params][locale][region]': 'UA',
'specs[async_search_results][1][search_request_params][name_map][query]': 'q',
'specs[async_search_results][1][search_request_params][name_map][query_type]': 'qt',
'specs[async_search_results][1][search_request_params][name_map][results_per_page]': 'result_count',
'specs[async_search_results][1][search_request_params][name_map][min_price]': 'min',
'specs[async_search_results][1][search_request_params][name_map][max_price]': 'max',
'specs[async_search_results][1][search_request_params][parameters][q]': '20s',
'specs[async_search_results][1][search_request_params][parameters][explicit]': '1',
'specs[async_search_results][1][search_request_params][parameters][ship_to]': 'GB',
'specs[async_search_results][1][search_request_params][parameters][page]': '3',
'specs[async_search_results][1][search_request_params][parameters][ref]': 'pagination',
'specs[async_search_results][1][search_request_params][parameters][facet]': 'clothing/womens-clothing',
'specs[async_search_results][1][search_request_params][parameters][referrer]': 'https://www.etsy.com/search/clothing/womens-clothing?q=20s&explicit=1&ship_to=GB&page=2&ref=pagination',
'specs[async_search_results][1][search_request_params][user_id]': '',
'specs[async_search_results][1][request_type]': 'pagination_preact',
'specs[async_search_results][1][is_eligible_for_spa_reformulations]': 'true',
'view_data_event_name': 'search_async_pagination_specview_rendered'
}
cookies = {
"user_prefs": "2sjEL59UUglDjNIW6TKc04MvLTVjZACCxJMbvsPoaKXQYBclnbzSnBwdpdQ83dBgJR2lUEeoiBGEwkXEMgAA",
"fve": "1640607991.0",
"ua": "531227642bc86f3b5fd7103a0c0b4fd6",
"_gcl_au": "1.1.717562651.1640607992",
"uaid": "E7bYwrWVwTy7YGe_b_ipYT3Avd9jZACCxJMbvoPpqwvzqpVKEzNTlKyUnLJ9Io3DTQt1k53MwiojXTLzvZPCS31yCoPC_JRqGQA.",
"pla_spr": "0",
"_gid": "GA1.2.1425785976.1641390447",
"_dc_gtm_UA-2409779-1": "1",
"_pin_unauth": "dWlkPU0yVTRaamxoTWpjdFlqTTVZUzAwT0RJeExXRmpNamt0WlROalpXTTVNREE0WkRVNQ",
"_ga": "GA1.1.1730759327.1640607993",
"_uetsid": "052ece906e2e11ecb56a0390ed629376",
"_uetvid": "39de7550671011ec80d2dbfaa05c901b",
"exp_hangover": "pB4zSokzfzMIT9Jzi7zIwmXybCJjZACCxJMbvoPpqwt7qpXKU5PiE4tKMtMykzMTc-JzEktS85Ir4wtN4o0MDC2VrJQy81JzMtMzk3JSlWoZAA..",
"_ga_KR3J610VYM": "GS1.1.1641390446.2.1.1641390474.32"
}
for url in self.start_urls:
yield scrapy.FormRequest(
'https://www.etsy.com/api/v3/ajax/bespoke/member/neu/specs/async_search_results',
headers=headers,
cookies=cookies,
method="POST",
formdata=data,
callback = self.parse_res
)