运行 URL 与 Flask 并行请求
Running URL Requests in Parallel with Flask
asyncio 对我来说还是比较新的。
我从基础开始 - 简单的 HTTP hello world - 仅发出大约 40 个并行 GET 请求并使用 Flask 获取 HTTP 响应的前 400 个字符("parallel" 函数由请求调用)。
python3.7运行
Traceback 显示了我不明白的错误。 "Constructor parameter should be str" 指的是哪个?我该如何进行?
这是应用程序的完整代码:
import aiohttp
import asyncio
import json
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
global urls
tasks = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(fetch(session, url))
htmls = await asyncio.gather(*tasks)
returnstring = ""
for html in htmls:
returnstring += html + ","
print(html[:400])
return returnstring
def parallel(request):
global urls
urls = []
request_json = request.get_json()
if request_json and 'urls' in request_json:
urls = request_json['urls']
print(urls)
loop = asyncio.get_event_loop()
return loop.run_until_complete(main())
Traceback 显示错误:
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 346, in run_http_function
result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 217, in invoke_user_function
return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 210, in call_user_function
return self._user_function(request_or_event)
File "/user_code/main.py", line 57, in parallel
return loop.run_until_complete(main())
File "/opt/python3.7/lib/python3.7/asyncio/base_events.py", line 573, in run_until_complete
return future.result()
File "/user_code/main.py", line 15, in main
htmls = await asyncio.gather(*tasks)
File "/user_code/main.py", line 6, in fetch
async with session.get(url) as response:
File "/env/local/lib/python3.7/site-packages/aiohttp/client.py", line 1012, in __aenter__
self._resp = await self._coro
File "/env/local/lib/python3.7/site-packages/aiohttp/client.py", line 380, in _request
url = URL(str_or_url)
File "/env/local/lib/python3.7/site-packages/yarl/__init__.py", line 149, in __new__
raise TypeError("Constructor parameter should be str")
TypeError: Constructor parameter should be str
我测试过:如果我在
中使用不同的字符串(即 tuple/list)
session.get( (url, something) )
然后我明白了你的错误。所以你在 urls 中有错误的数据。
我用来测试的代码:
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main(urls):
tasks = []
results = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(fetch(session, url))
results = await asyncio.gather(*tasks)
return results
def parallel(urls):
loop = asyncio.get_event_loop()
results = loop.run_until_complete(main(urls))
return results
# --- main ---
urls = [
#('https://whosebug.com/', 1), # TypeError: Constructor parameter should be str
'https://whosebug.com/',
'https://httpbin.org/',
'http://toscrape.com/',
]
result = parallel(urls)
for item in result:
print(item[:300])
print('-----')
我不知道你得到了什么 request_json['urls']
但你应该只得到 url
urls = request_json['urls']
urls = [ ??? for x in urls] # in place `???` use code which get only url from `x`
asyncio 对我来说还是比较新的。
我从基础开始 - 简单的 HTTP hello world - 仅发出大约 40 个并行 GET 请求并使用 Flask 获取 HTTP 响应的前 400 个字符("parallel" 函数由请求调用)。
python3.7运行
Traceback 显示了我不明白的错误。 "Constructor parameter should be str" 指的是哪个?我该如何进行?
这是应用程序的完整代码:
import aiohttp
import asyncio
import json
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
global urls
tasks = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(fetch(session, url))
htmls = await asyncio.gather(*tasks)
returnstring = ""
for html in htmls:
returnstring += html + ","
print(html[:400])
return returnstring
def parallel(request):
global urls
urls = []
request_json = request.get_json()
if request_json and 'urls' in request_json:
urls = request_json['urls']
print(urls)
loop = asyncio.get_event_loop()
return loop.run_until_complete(main())
Traceback 显示错误:
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 346, in run_http_function
result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 217, in invoke_user_function
return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 210, in call_user_function
return self._user_function(request_or_event)
File "/user_code/main.py", line 57, in parallel
return loop.run_until_complete(main())
File "/opt/python3.7/lib/python3.7/asyncio/base_events.py", line 573, in run_until_complete
return future.result()
File "/user_code/main.py", line 15, in main
htmls = await asyncio.gather(*tasks)
File "/user_code/main.py", line 6, in fetch
async with session.get(url) as response:
File "/env/local/lib/python3.7/site-packages/aiohttp/client.py", line 1012, in __aenter__
self._resp = await self._coro
File "/env/local/lib/python3.7/site-packages/aiohttp/client.py", line 380, in _request
url = URL(str_or_url)
File "/env/local/lib/python3.7/site-packages/yarl/__init__.py", line 149, in __new__
raise TypeError("Constructor parameter should be str")
TypeError: Constructor parameter should be str
我测试过:如果我在
中使用不同的字符串(即 tuple/list)session.get( (url, something) )
然后我明白了你的错误。所以你在 urls 中有错误的数据。
我用来测试的代码:
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main(urls):
tasks = []
results = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(fetch(session, url))
results = await asyncio.gather(*tasks)
return results
def parallel(urls):
loop = asyncio.get_event_loop()
results = loop.run_until_complete(main(urls))
return results
# --- main ---
urls = [
#('https://whosebug.com/', 1), # TypeError: Constructor parameter should be str
'https://whosebug.com/',
'https://httpbin.org/',
'http://toscrape.com/',
]
result = parallel(urls)
for item in result:
print(item[:300])
print('-----')
我不知道你得到了什么 request_json['urls']
但你应该只得到 url
urls = request_json['urls']
urls = [ ??? for x in urls] # in place `???` use code which get only url from `x`