Python 请求模块 - 一次访问多个 URL
Python Requests module - Access multiple URL's at once
我需要使用大约 500 多个 URL 来验证服务及其依赖关系,并且我已经有一个 python 代码可以执行此操作。
但问题是 URL 中的一些人每个人都需要一分钟的时间来做出回应 [由于某些已知的依赖项已关闭]。
由于每个 URL 都托管在不同的服务器上,有没有办法使用请求模块一次访问多个 URL?
下面是我在 pycharm 中使用的全部代码:
import requests
import json
import pandas
import datetime
from requests.auth import HTTPBasicAuth
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def validate():
line = "---------------------------------------------------------------------------------------------------"
print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)
username = 'username'
password = 'password'
mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')
for i in mydata.index:
srno = str(mydata['Sr No'][i])
service = mydata['Service Name'][i]
machine = mydata['Machine Name'][i]
url = mydata['Node'][i]
alwaysdownservice = ['service1', 'service2']
paydown = ['dependency1', 'dependency2', 'dependency3']
otherdown = ['dependency3']
def get():
response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
data = json.loads(response.text)
status = data['Success']
if not status:
response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
data = json.loads(response.text)
status = data['Success']
if not status:
for j in list(data['Dependencies']):
dependency = j['DependencyName']
d_status = j['Success']
if not d_status:
if service in alwaysdownservice:
if dependency not in paydown:
print(Dependency, "down on", machine, "for", service.)
else:
if dependency not in otherdown:
print(Dependency, "down on", machine, "for", service.)
try:
get()
except Exception as e:
print(line, "\n", e, "\n", srno, "| Below URL is not accessible: \n", url, "\n" + line)
validate()
您可以使用线程(使用 Python 中的 threading
库)一次调用多个 URL。为此,您可以使用以下代码:
import requests
import json
import pandas
import datetime
from requests.auth import HTTPBasicAuth
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import threading
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
threads = []
def validate():
line = "---------------------------------------------------------------------------------------------------"
print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)
username = 'username'
password = 'password'
mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')
for i in mydata.index:
srno = str(mydata['Sr No'][i])
service = mydata['Service Name'][i]
machine = mydata['Machine Name'][i]
url = mydata['Node'][i]
alwaysdownservice = ['service1', 'service2']
paydown = ['dependency1', 'dependency2', 'dependency3']
otherdown = ['dependency3']
def get():
response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
data = json.loads(response.text)
status = data['Success']
if not status:
response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
data = json.loads(response.text)
status = data['Success']
if not status:
for j in list(data['Dependencies']):
dependency = j['DependencyName']
d_status = j['Success']
if not d_status:
if service in alwaysdownservice:
if dependency not in paydown:
print(Dependency, "down on", machine, "for", service)
else:
if dependency not in otherdown:
print(Dependency, "down on", machine, "for", service)
try:
t = threading.Thread(target=get) # Using threading over here
t.start()
threads.append(t)
except Exception as e:
print(line, "\n", e, "\n", srno, "| Below URL is not accessible: \n", url, "\n" + line)
validate()
for thread in threads:
thread.join()
对于需要解决方案的人。我从@Yurii Kramarenko 那里找到了 。效果很好,现在我的脚本在 30 秒内完成了它的 运行,而不是 10-11 分钟。
我的脚本-
def validate():
alwaysdownservice = ['service1', 'service2']
paydown = ['dependency1', 'dependency2', 'dependency3']
otherdown = ['dependency3']
username = 'username'
password = 'password'
mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')
urls = mydata['urls']
line = "---------------------------------------------------------------------------------------------------"
print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)
async def fetch(session, url):
async with session.get(url, auth=aiohttp.BasicAuth(username, password), ssl=False) as response:
data = await response.text()
data = json.loads(data)
status = data['Success']
if not status:
for j in list(data['Dependencies']):
dependency = j['DependencyName']
d_status = j['Success']
if not d_status:
if service in alwaysdownservice:
if dependency not in paydown:
print("Dependency -",
"\'" + dependency + "\'", "down on", "\nURL -", url, "\n" + line)
else:
if dependency not in otherdown:
("Dependency -",
"\'" + dependency + "\'", "down on", "\nURL -", url, "\n" + line)
print(url, "validated at:", datetime.datetime.now().strftime("%H:%M:%S"))
async def fetch_all(urls, loop):
async with aiohttp.ClientSession(loop=loop) as session:
results = await asyncio.gather(*[fetch(session, url) for url in urls], return_exceptions=True)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
htmls = loop.run_until_complete(fetch_all(urls, loop))
print("Validation completed for:",
datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line, "\n" + line,)
validate()
我需要使用大约 500 多个 URL 来验证服务及其依赖关系,并且我已经有一个 python 代码可以执行此操作。 但问题是 URL 中的一些人每个人都需要一分钟的时间来做出回应 [由于某些已知的依赖项已关闭]。 由于每个 URL 都托管在不同的服务器上,有没有办法使用请求模块一次访问多个 URL?
下面是我在 pycharm 中使用的全部代码:
import requests
import json
import pandas
import datetime
from requests.auth import HTTPBasicAuth
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def validate():
line = "---------------------------------------------------------------------------------------------------"
print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)
username = 'username'
password = 'password'
mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')
for i in mydata.index:
srno = str(mydata['Sr No'][i])
service = mydata['Service Name'][i]
machine = mydata['Machine Name'][i]
url = mydata['Node'][i]
alwaysdownservice = ['service1', 'service2']
paydown = ['dependency1', 'dependency2', 'dependency3']
otherdown = ['dependency3']
def get():
response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
data = json.loads(response.text)
status = data['Success']
if not status:
response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
data = json.loads(response.text)
status = data['Success']
if not status:
for j in list(data['Dependencies']):
dependency = j['DependencyName']
d_status = j['Success']
if not d_status:
if service in alwaysdownservice:
if dependency not in paydown:
print(Dependency, "down on", machine, "for", service.)
else:
if dependency not in otherdown:
print(Dependency, "down on", machine, "for", service.)
try:
get()
except Exception as e:
print(line, "\n", e, "\n", srno, "| Below URL is not accessible: \n", url, "\n" + line)
validate()
您可以使用线程(使用 Python 中的 threading
库)一次调用多个 URL。为此,您可以使用以下代码:
import requests
import json
import pandas
import datetime
from requests.auth import HTTPBasicAuth
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import threading
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
threads = []
def validate():
line = "---------------------------------------------------------------------------------------------------"
print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)
username = 'username'
password = 'password'
mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')
for i in mydata.index:
srno = str(mydata['Sr No'][i])
service = mydata['Service Name'][i]
machine = mydata['Machine Name'][i]
url = mydata['Node'][i]
alwaysdownservice = ['service1', 'service2']
paydown = ['dependency1', 'dependency2', 'dependency3']
otherdown = ['dependency3']
def get():
response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
data = json.loads(response.text)
status = data['Success']
if not status:
response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
data = json.loads(response.text)
status = data['Success']
if not status:
for j in list(data['Dependencies']):
dependency = j['DependencyName']
d_status = j['Success']
if not d_status:
if service in alwaysdownservice:
if dependency not in paydown:
print(Dependency, "down on", machine, "for", service)
else:
if dependency not in otherdown:
print(Dependency, "down on", machine, "for", service)
try:
t = threading.Thread(target=get) # Using threading over here
t.start()
threads.append(t)
except Exception as e:
print(line, "\n", e, "\n", srno, "| Below URL is not accessible: \n", url, "\n" + line)
validate()
for thread in threads:
thread.join()
对于需要解决方案的人。我从@Yurii Kramarenko 那里找到了
我的脚本-
def validate():
alwaysdownservice = ['service1', 'service2']
paydown = ['dependency1', 'dependency2', 'dependency3']
otherdown = ['dependency3']
username = 'username'
password = 'password'
mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')
urls = mydata['urls']
line = "---------------------------------------------------------------------------------------------------"
print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)
async def fetch(session, url):
async with session.get(url, auth=aiohttp.BasicAuth(username, password), ssl=False) as response:
data = await response.text()
data = json.loads(data)
status = data['Success']
if not status:
for j in list(data['Dependencies']):
dependency = j['DependencyName']
d_status = j['Success']
if not d_status:
if service in alwaysdownservice:
if dependency not in paydown:
print("Dependency -",
"\'" + dependency + "\'", "down on", "\nURL -", url, "\n" + line)
else:
if dependency not in otherdown:
("Dependency -",
"\'" + dependency + "\'", "down on", "\nURL -", url, "\n" + line)
print(url, "validated at:", datetime.datetime.now().strftime("%H:%M:%S"))
async def fetch_all(urls, loop):
async with aiohttp.ClientSession(loop=loop) as session:
results = await asyncio.gather(*[fetch(session, url) for url in urls], return_exceptions=True)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
htmls = loop.run_until_complete(fetch_all(urls, loop))
print("Validation completed for:",
datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line, "\n" + line,)
validate()