使用aiohttp获取多个网站的状态
Using aiohttp to get the status of a number of websites
我有这段代码用于获取网站列表的状态。
import aiohttp
import asyncio
import json
import sys
import time
async def get_statuses(websites):
statuses = {}
tasks = [get_website_status(website) for website in websites]
for status in await asyncio.gather(*tasks):
if not statuses.get(status):
statuses[status] = 0
statuses[status] += 1
print(json.dumps(statuses))
async def get_website_status(url):
response = await aiohttp.get(url)
status = response.status
response.close()
return status
if __name__ == '__main__':
with open(sys.argv[1], 'r') as f:
websites = f.read().splitlines()
t0 = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(get_statuses(websites))
t1 = time.time()
print("getting website statuses took {0:.1f} seconds".format(t1-t0))
并且由于 get 已贬值 await aiohttp.get(url)
我这样编辑了代码
import aiohttp
import asyncio
import json
import sys
import time
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def get_statuses(websites):
statuses = {}
tasks = [get_website_status(website) for website in websites]
for status in await asyncio.gather(*tasks):
if not statuses.get(status):
statuses[status] = 0
statuses[status] += 1
print(json.dumps(statuses))
async def get_website_status(url):
async with aiohttp.ClientSession() as session:
response = await fetch(session, url)
#response = await aiohttp.get(url)
status = response.status
response.close()
return status
if __name__ == '__main__':
with open(sys.argv[1], 'r') as f:
websites = f.read().splitlines()
t0 = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(get_statuses(websites))
t1 = time.time()
print("getting website statuses took {0:.1f} seconds".format(t1-t0))
我从文档中复制了会话代码https://aiohttp.readthedocs.io/en/stable/
然而,当我 运行 我的代码出现此错误时:
c:\asyncio>a.py list.txt
Traceback (most recent call last):
File "C:\asyncio\a.py", line 35, in <module>
loop.run_until_complete(get_statuses(websites))
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\asyncio\base_ev
ents.py", line 579, in run_until_complete
return future.result()
File "C:\asyncio\a.py", line 14, in get_statuses
for status in await asyncio.gather(*tasks):
File "C:\asyncio\a.py", line 25, in get_website_status
status = response.status
AttributeError: 'str' object has no attribute 'status'
c:\asyncio>
这是一个示例list.txt
https://facebook.com/
https://twitter.com/
https://google.com/
https://youtube.com/
https://linkedin.com/
https://instagram.com/
https://pinterest.com/
get_website_status
例程委托调用 fetch
函数,其中 return 的文本内容 response.text()
,而不是 response
本身。
这就是为什么 response.status
会抛出一个明显的错误。
如果不需要响应内容,要修复错误,将 fetch
函数更改为 return response
对象:
async def fetch(session, url):
response = await session.get(url)
return response
我有这段代码用于获取网站列表的状态。
import aiohttp
import asyncio
import json
import sys
import time
async def get_statuses(websites):
statuses = {}
tasks = [get_website_status(website) for website in websites]
for status in await asyncio.gather(*tasks):
if not statuses.get(status):
statuses[status] = 0
statuses[status] += 1
print(json.dumps(statuses))
async def get_website_status(url):
response = await aiohttp.get(url)
status = response.status
response.close()
return status
if __name__ == '__main__':
with open(sys.argv[1], 'r') as f:
websites = f.read().splitlines()
t0 = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(get_statuses(websites))
t1 = time.time()
print("getting website statuses took {0:.1f} seconds".format(t1-t0))
并且由于 get 已贬值 await aiohttp.get(url)
我这样编辑了代码
import aiohttp
import asyncio
import json
import sys
import time
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def get_statuses(websites):
statuses = {}
tasks = [get_website_status(website) for website in websites]
for status in await asyncio.gather(*tasks):
if not statuses.get(status):
statuses[status] = 0
statuses[status] += 1
print(json.dumps(statuses))
async def get_website_status(url):
async with aiohttp.ClientSession() as session:
response = await fetch(session, url)
#response = await aiohttp.get(url)
status = response.status
response.close()
return status
if __name__ == '__main__':
with open(sys.argv[1], 'r') as f:
websites = f.read().splitlines()
t0 = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(get_statuses(websites))
t1 = time.time()
print("getting website statuses took {0:.1f} seconds".format(t1-t0))
我从文档中复制了会话代码https://aiohttp.readthedocs.io/en/stable/
然而,当我 运行 我的代码出现此错误时:
c:\asyncio>a.py list.txt
Traceback (most recent call last):
File "C:\asyncio\a.py", line 35, in <module>
loop.run_until_complete(get_statuses(websites))
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\asyncio\base_ev
ents.py", line 579, in run_until_complete
return future.result()
File "C:\asyncio\a.py", line 14, in get_statuses
for status in await asyncio.gather(*tasks):
File "C:\asyncio\a.py", line 25, in get_website_status
status = response.status
AttributeError: 'str' object has no attribute 'status'
c:\asyncio>
这是一个示例list.txt
https://facebook.com/
https://twitter.com/
https://google.com/
https://youtube.com/
https://linkedin.com/
https://instagram.com/
https://pinterest.com/
get_website_status
例程委托调用 fetch
函数,其中 return 的文本内容 response.text()
,而不是 response
本身。
这就是为什么 response.status
会抛出一个明显的错误。
如果不需要响应内容,要修复错误,将 fetch
函数更改为 return response
对象:
async def fetch(session, url):
response = await session.get(url)
return response