Google 自定义搜索 API nexpage 出错
Error in Google Custom Search API nexpage
Google自定义API我写的搜索代码
import time
from googleapiclient.discovery import build
# Google custom search API info
API_KEY = 'My Key'
API_ID = 'My ID'
def spider_next_page(service, index, searchterm_in, incl_searchterm_in, sitesearch_in):
time.sleep(1)
response = service.cse().list(
q=searchterm_in,
orTerms=incl_searchterm_in,
siteSearch=sitesearch_in,
cx=my_cse_id,
num=10,
start=index,
).execute()
nextPageIndex = response['queries']['nextPage'][0]['startIndex']
nextPageCount = response['queries']['nextPage'][0]['count']
for values_spider_next_page in response['items']:
itemsave.append(values_spider_next_page)
if nextPageIndex == 21:
pageno = ((nextPageIndex - 1) / 10)
print('Found', round(pageno), "Pages")
return
if nextPageCount != 0:
spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)
def spider(searchterm_in, incl_searchterm_in, sitesearch_in):
service = build("customsearch", "v1",
developerKey=my_api_key)
res = service.cse().list(
q=searchterm_in,
orTerms=incl_searchterm_in,
siteSearch=sitesearch_in,
cx=my_cse_id,
).execute()
searchterm_in = res['queries']['request'][0]['searchTerms']
nextPageIndex = res['queries']['nextPage'][0]['startIndex']
nextPageCount = res['queries']['nextPage'][0]['count']
print('nextPageIndex', nextPageIndex)
print("Search Query:", searchterm_in)
print("Include Query :", incl_searchterm_in)
print("Target Site :", sitesearch_in)
for values_spider in res['items']:
itemsave.append(values_spider)
if nextPageCount != 0:
spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)
return
if __name__ == '__main__':
my_api_key = API_KEY
my_cse_id = API_ID
itemsave = []
spider('men shorts', 'adidas', 'yahoo.com')
正常情况下可以无误地获取数据,但有时会出现如下错误。
Traceback (most recent call last):
File "D://Google_Search_API_0.4.py", line 122, in <module>
spider(f'{searchterm}', f'{incl_searchterm}', f'{sitesearch}')
File "D://Google_Search_API_0.4.py", line 70, in spider
nextPageIndex = res['queries']['nextPage'][0]['startIndex']
KeyError: 'nextPage'
它按以下顺序工作。
spider('men shorts', 'adidas', 'yahoo.com') # input query (q,orTerms,siteSearch)
spider(searchterm_in, incl_searchterm_in, sitesearch_in): # first page -> get input query
spider_next_page(service, index, searchterm_in, incl_searchterm_in, sitesearch_in): # next page
如果出现错误,就出现在这里。
def spider(searchterm_in, incl_searchterm_in, sitesearch_in):
...
--> nextPageIndex = res['queries']['nextPage'][0]['startIndex']
如果我是对的,问题是什么?为什么它有时可以正常运行?
可能是数据结束所以没有了nextPage
。
您可以使用 try/except
来捕获错误并跳过代码。
或者你应该 运行 它在 if 'nextPage' in res['queries']:
if 'nextPage' not in res['queries']:`
print("The End")
else:
nextPageIndex = response['queries']['nextPage'][0]['startIndex']
nextPageCount = response['queries']['nextPage'][0]['count']
for values_spider_next_page in response['items']:
itemsave.append(values_spider_next_page)
if nextPageIndex == 21:
pageno = ((nextPageIndex - 1) / 10)
print('Found', round(pageno), "Pages")
return
if nextPageCount != 0:
spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)
Google自定义API我写的搜索代码
import time
from googleapiclient.discovery import build
# Google custom search API info
API_KEY = 'My Key'
API_ID = 'My ID'
def spider_next_page(service, index, searchterm_in, incl_searchterm_in, sitesearch_in):
time.sleep(1)
response = service.cse().list(
q=searchterm_in,
orTerms=incl_searchterm_in,
siteSearch=sitesearch_in,
cx=my_cse_id,
num=10,
start=index,
).execute()
nextPageIndex = response['queries']['nextPage'][0]['startIndex']
nextPageCount = response['queries']['nextPage'][0]['count']
for values_spider_next_page in response['items']:
itemsave.append(values_spider_next_page)
if nextPageIndex == 21:
pageno = ((nextPageIndex - 1) / 10)
print('Found', round(pageno), "Pages")
return
if nextPageCount != 0:
spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)
def spider(searchterm_in, incl_searchterm_in, sitesearch_in):
service = build("customsearch", "v1",
developerKey=my_api_key)
res = service.cse().list(
q=searchterm_in,
orTerms=incl_searchterm_in,
siteSearch=sitesearch_in,
cx=my_cse_id,
).execute()
searchterm_in = res['queries']['request'][0]['searchTerms']
nextPageIndex = res['queries']['nextPage'][0]['startIndex']
nextPageCount = res['queries']['nextPage'][0]['count']
print('nextPageIndex', nextPageIndex)
print("Search Query:", searchterm_in)
print("Include Query :", incl_searchterm_in)
print("Target Site :", sitesearch_in)
for values_spider in res['items']:
itemsave.append(values_spider)
if nextPageCount != 0:
spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)
return
if __name__ == '__main__':
my_api_key = API_KEY
my_cse_id = API_ID
itemsave = []
spider('men shorts', 'adidas', 'yahoo.com')
正常情况下可以无误地获取数据,但有时会出现如下错误。
Traceback (most recent call last):
File "D://Google_Search_API_0.4.py", line 122, in <module>
spider(f'{searchterm}', f'{incl_searchterm}', f'{sitesearch}')
File "D://Google_Search_API_0.4.py", line 70, in spider
nextPageIndex = res['queries']['nextPage'][0]['startIndex']
KeyError: 'nextPage'
它按以下顺序工作。
spider('men shorts', 'adidas', 'yahoo.com') # input query (q,orTerms,siteSearch)
spider(searchterm_in, incl_searchterm_in, sitesearch_in): # first page -> get input query
spider_next_page(service, index, searchterm_in, incl_searchterm_in, sitesearch_in): # next page
如果出现错误,就出现在这里。
def spider(searchterm_in, incl_searchterm_in, sitesearch_in):
...
--> nextPageIndex = res['queries']['nextPage'][0]['startIndex']
如果我是对的,问题是什么?为什么它有时可以正常运行?
可能是数据结束所以没有了nextPage
。
您可以使用 try/except
来捕获错误并跳过代码。
或者你应该 运行 它在 if 'nextPage' in res['queries']:
if 'nextPage' not in res['queries']:`
print("The End")
else:
nextPageIndex = response['queries']['nextPage'][0]['startIndex']
nextPageCount = response['queries']['nextPage'][0]['count']
for values_spider_next_page in response['items']:
itemsave.append(values_spider_next_page)
if nextPageIndex == 21:
pageno = ((nextPageIndex - 1) / 10)
print('Found', round(pageno), "Pages")
return
if nextPageCount != 0:
spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)