使用aiohttp获取多个网站的状态

Question

我有这段代码用于获取网站列表的状态。

import aiohttp
import asyncio
import json
import sys
import time


async def get_statuses(websites):
    statuses = {}
    tasks = [get_website_status(website) for website in websites]
    for status in await asyncio.gather(*tasks):
        if not statuses.get(status):
            statuses[status] = 0
        statuses[status] += 1
    print(json.dumps(statuses))


async def get_website_status(url):
    response = await aiohttp.get(url)
    status = response.status
    response.close()
    return status


if __name__ == '__main__':
    with open(sys.argv[1], 'r') as f:
        websites = f.read().splitlines()
    t0 = time.time()
    loop = asyncio.get_event_loop()
    loop.run_until_complete(get_statuses(websites))
    t1 = time.time()
    print("getting website statuses took {0:.1f} seconds".format(t1-t0))

并且由于 get 已贬值 await aiohttp.get(url) 我这样编辑了代码

import aiohttp
import asyncio
import json
import sys
import time

async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text()

async def get_statuses(websites):
    statuses = {}
    tasks = [get_website_status(website) for website in websites]
    for status in await asyncio.gather(*tasks):
        if not statuses.get(status):
            statuses[status] = 0
        statuses[status] += 1
    print(json.dumps(statuses))


async def get_website_status(url):
 async with aiohttp.ClientSession() as session:
    response = await fetch(session, url)
    #response = await aiohttp.get(url)
    status = response.status
    response.close()
    return status


if __name__ == '__main__':
    with open(sys.argv[1], 'r') as f:
        websites = f.read().splitlines()
    t0 = time.time()
    loop = asyncio.get_event_loop()
    loop.run_until_complete(get_statuses(websites))
    t1 = time.time()
    print("getting website statuses took {0:.1f} seconds".format(t1-t0))

我从文档中复制了会话代码https://aiohttp.readthedocs.io/en/stable/

然而，当我运行我的代码出现此错误时：

c:\asyncio>a.py list.txt
Traceback (most recent call last):
  File "C:\asyncio\a.py", line 35, in <module>
    loop.run_until_complete(get_statuses(websites))
  File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\asyncio\base_ev
ents.py", line 579, in run_until_complete
    return future.result()
  File "C:\asyncio\a.py", line 14, in get_statuses
    for status in await asyncio.gather(*tasks):
  File "C:\asyncio\a.py", line 25, in get_website_status
    status = response.status
AttributeError: 'str' object has no attribute 'status'

c:\asyncio>

这是一个示例list.txt

https://facebook.com/
https://twitter.com/
https://google.com/
https://youtube.com/
https://linkedin.com/
https://instagram.com/
https://pinterest.com/

Answer 1

get_website_status 例程委托调用 fetch 函数，其中 return 的文本内容 response.text()，而不是 response 本身。
这就是为什么 response.status 会抛出一个明显的错误。

如果不需要响应内容，要修复错误，将 fetch 函数更改为 return response 对象：

async def fetch(session, url):
    response = await session.get(url)
    return response

使用aiohttp获取多个网站的状态

Using aiohttp to get the status of a number of websites

python

python-asyncio

aiohttp