Google 自定义搜索 API nexpage 出错

Error in Google Custom Search API nexpage

Google自定义API我写的搜索代码

import time
from googleapiclient.discovery import build

# Google custom search API info
API_KEY = 'My Key'
API_ID = 'My ID'


def spider_next_page(service, index, searchterm_in, incl_searchterm_in, sitesearch_in):
    time.sleep(1)
    response = service.cse().list(
        q=searchterm_in,
        orTerms=incl_searchterm_in,
        siteSearch=sitesearch_in,
        cx=my_cse_id,
        num=10,
        start=index,
    ).execute()

    nextPageIndex = response['queries']['nextPage'][0]['startIndex']
    nextPageCount = response['queries']['nextPage'][0]['count']

    for values_spider_next_page in response['items']:
        itemsave.append(values_spider_next_page)

    if nextPageIndex == 21:
        pageno = ((nextPageIndex - 1) / 10)
        print('Found', round(pageno), "Pages")
        return

    if nextPageCount != 0:
        spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)


def spider(searchterm_in, incl_searchterm_in, sitesearch_in):
    service = build("customsearch", "v1",
                    developerKey=my_api_key)
    res = service.cse().list(
        q=searchterm_in,
        orTerms=incl_searchterm_in,
        siteSearch=sitesearch_in,
        cx=my_cse_id,
    ).execute()
    searchterm_in = res['queries']['request'][0]['searchTerms']
    nextPageIndex = res['queries']['nextPage'][0]['startIndex']
    nextPageCount = res['queries']['nextPage'][0]['count']
    print('nextPageIndex', nextPageIndex)
    print("Search Query:", searchterm_in)
    print("Include Query :", incl_searchterm_in)
    print("Target Site :", sitesearch_in)

    for values_spider in res['items']:

        itemsave.append(values_spider)

    if nextPageCount != 0:
        spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)
        return


if __name__ == '__main__':
    my_api_key = API_KEY
    my_cse_id = API_ID
    itemsave = []
    spider('men shorts', 'adidas', 'yahoo.com')

正常情况下可以无误地获取数据,但有时会出现如下错误。

Traceback (most recent call last):
  File "D://Google_Search_API_0.4.py", line 122, in <module>
    spider(f'{searchterm}', f'{incl_searchterm}', f'{sitesearch}')
  File "D://Google_Search_API_0.4.py", line 70, in spider
    nextPageIndex = res['queries']['nextPage'][0]['startIndex']
KeyError: 'nextPage'

它按以下顺序工作。

spider('men shorts', 'adidas', 'yahoo.com') # input query (q,orTerms,siteSearch)

spider(searchterm_in, incl_searchterm_in, sitesearch_in): # first page -> get input query

spider_next_page(service, index, searchterm_in, incl_searchterm_in, sitesearch_in): # next page

如果出现错误,就出现在这里。

def spider(searchterm_in, incl_searchterm_in, sitesearch_in):
    ...
   --> nextPageIndex = res['queries']['nextPage'][0]['startIndex']

如果我是对的,问题是什么?为什么它有时可以正常运行?

可能是数据结束所以没有了nextPage

您可以使用 try/except 来捕获错误并跳过代码。

或者你应该 运行 它在 if 'nextPage' in res['queries']:

if 'nextPage' not in res['queries']:`
    print("The End")

else:
    nextPageIndex = response['queries']['nextPage'][0]['startIndex']
    nextPageCount = response['queries']['nextPage'][0]['count']

    for values_spider_next_page in response['items']:
        itemsave.append(values_spider_next_page)

    if nextPageIndex == 21:
        pageno = ((nextPageIndex - 1) / 10)
        print('Found', round(pageno), "Pages")
        return

    if nextPageCount != 0:
        spider_next_page(service, nextPageIndex, searchterm_in, incl_searchterm_in, sitesearch_in)