Dash 应用程序与 AWS postgres 数据库的连接非常慢
Dash app connections to AWS postgres DB VERY SLOW
我创建了一个连接到 public 面向 AWS Postgres 数据库的实时更新 dash 应用程序。我已将数据库连接放在我的回调中以便它更新,但我发现检索数据和创建图形需要很长时间,因此如果间隔时间减少到 10 秒或更短,则根本不会加载图形.我尝试将数据存储在 dcc.store 中,但初始加载仍然需要很长时间。我的缩写代码写在下面。我假设滞后时间来自连接到数据库的引擎,因为我只读取了几行和几列。有没有办法加快速度?
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from plotly.subplots import make_subplots
from sqlalchemy import create_engine, MetaData, Table
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import declarative_base
from sqlalchemy import Column, Integer, String, func, Date, ARRAY
from sqlalchemy.orm import sessionmaker
app = dash.Dash(__name__, external_stylesheets=[BS], suppress_callback_exceptions=True, update_title=None)
server=app.server
app.layout = html.Div([
dcc.Store(id='time', storage_type='session'),
dcc.Store(id='blood_pressure', storage_type='session'),
html.Div(dcc.Graph(id='live-graph', animate=False), className='w-100'),
html.Div(id= "testing"),
dcc.Interval(
id='graph-update-BP',
interval=30000,
n_intervals=0
)]), width={"size": 10, "offset": 0.5}),
@app.callback(
dash.dependencies.Output('live-graph', 'figure'),
dash.dependencies.Output('blood_pressure', 'data'),
dash.dependencies.Output('time', 'data'),
[dash.dependencies.Input('graph-update-BP', 'n_intervals')],
Input('live-graph', 'relayoutData'),
)
def update_graph_scatter_1(n):
trace = []
blood_pressure = []
time = []
engine = create_engine("postgresql://username:password@address:5432/xxxxx", echo=True, future=True)
Session = sessionmaker(bind=engine)
session = Session()
Base = automap_base()
Base.prepare(engine, reflect=True)
User = Base.classes.users
Datex = Base.classes.data
for instance in session.query(Datex).filter(Datex.user_id == 3).filter(Datex.date_time == 'Monday,Apr:26'):
blood_pressure.append([instance.systolic, instance.mean, instance.diastolic])
time.append(instance.time)
for i in range(0, len(blood_pressure)):
trace.append(go.Box(y=blood_pressure[i],
x=time[i],
line=dict(color='#6a92ff'),
hoverinfo='all'))
fig = make_subplots(rows=1, cols=1)
def append_trace():
for i in range(0, len(trace)):
fig.append_trace(trace[i], 1, 1)
append_trace()
return fig, blood_pressure, hr,
您可以通过以下方式提高应用的性能:
非编程方法:
- 如果您的应用程序部署在 AWS 上,请确保您的应用程序通过私有 IP 连接到您的数据库。这减少了您的数据必须遍历的网络数量,并将显着降低延迟。
- 确保您的虚拟机有足够的 RAM。 (如果您将 2GB 的数据加载到具有 1GB 可用 RAM 的机器上,您将在加载到您的程序之前看到 IO 命中磁盘。)
编程方式:
- 模块化连接到您的数据库,并且只连接一次。这减少了保留资源和验证连接到数据库所需的开销
import os
class DbConnection:
"""Use this class to connect to your database within a dashapp"""
def __init__(self, **kwargs):
self.DB_URI = os.environ.get('DB_URI', kwargs.get('DB_URI'))
self.echo = kwargs.get('echo', True)
self.future = kwargs.get('future', True)
# Now create the engine
self.engine = create_engine(self.DB_URI, echo=self.echo, future=self.self)
# Make the session maker
self.session_maker = sessionmaker(bind=self.engine)
@property
def session(self):
"""Return a session as a property"""
return self.session_maker()
# -------------------------------------------
# In your app, instantiate the database connection
# and map your base
my_db_connection = DbConnection() # provide kwargs as needed
session = my_db_connection.session # necessary to assign property to a variable
# Map the classes
Base = automap_base()
Base.prepare(my_db_connection.engine, reflect=True)
User = Base.classes.users
Datex = Base.classes.data
- 缓存经常查询的数据。除非您的数据量很大且变化很大,否则您应该期望从计算机上的磁盘(或 RAM)加载数据比通过网络从数据库加载数据具有更好的性能。
from functools import lru_cache
@lru_cache()
def get_blood_pressure(session, user_id, date):
"""returns blood pressure for a given user for a given date"""
blood_pressure, time = [], []
query = session.query(Datex)\
.filter(Datex.user_id == 3)\
.filter(Datex.date_time == 'Monday,Apr:26')
# I like short variable names when interacting with db results
for rec in query:
time.append(rec.time)
blood_pressure.append([rec.systolic, rec.mean, rec.diastolic])
# finally
return blood_pressure, time
- 把它们放在一起,你的回调应该会快很多
def update_graph_scatter_1(n):
# I'm not sure how these variables will be assigned
# but you'll figure it out
blood_pressure, time = get_blood_pressure(session=session, user_id=user_id, date='Monday,Apr:26')
# Create new traces
for i in range(0, len(blood_pressure)):
trace.append(go.Box(
y=blood_pressure[i],
x=time[i],
line=dict(color='#6a92ff'),
hoverinfo='all'
))
# Add to subplots
fig = make_subplots(rows=1, cols=1)
for i in range(0, len(trace)):
fig.append_trace(trace[i], 1, 1)
return fig, blood_pressure, time
- 最后,您似乎在每次更新时都重新创建图形对象。这是一项繁重的手术。我建议改为更新图表的数据。我知道这是可能的,因为我过去曾这样做过。但不幸的是,看起来解决方案并不简单。也许是稍后回复或跟进问题的项目。
我创建了一个连接到 public 面向 AWS Postgres 数据库的实时更新 dash 应用程序。我已将数据库连接放在我的回调中以便它更新,但我发现检索数据和创建图形需要很长时间,因此如果间隔时间减少到 10 秒或更短,则根本不会加载图形.我尝试将数据存储在 dcc.store 中,但初始加载仍然需要很长时间。我的缩写代码写在下面。我假设滞后时间来自连接到数据库的引擎,因为我只读取了几行和几列。有没有办法加快速度?
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from plotly.subplots import make_subplots
from sqlalchemy import create_engine, MetaData, Table
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import declarative_base
from sqlalchemy import Column, Integer, String, func, Date, ARRAY
from sqlalchemy.orm import sessionmaker
app = dash.Dash(__name__, external_stylesheets=[BS], suppress_callback_exceptions=True, update_title=None)
server=app.server
app.layout = html.Div([
dcc.Store(id='time', storage_type='session'),
dcc.Store(id='blood_pressure', storage_type='session'),
html.Div(dcc.Graph(id='live-graph', animate=False), className='w-100'),
html.Div(id= "testing"),
dcc.Interval(
id='graph-update-BP',
interval=30000,
n_intervals=0
)]), width={"size": 10, "offset": 0.5}),
@app.callback(
dash.dependencies.Output('live-graph', 'figure'),
dash.dependencies.Output('blood_pressure', 'data'),
dash.dependencies.Output('time', 'data'),
[dash.dependencies.Input('graph-update-BP', 'n_intervals')],
Input('live-graph', 'relayoutData'),
)
def update_graph_scatter_1(n):
trace = []
blood_pressure = []
time = []
engine = create_engine("postgresql://username:password@address:5432/xxxxx", echo=True, future=True)
Session = sessionmaker(bind=engine)
session = Session()
Base = automap_base()
Base.prepare(engine, reflect=True)
User = Base.classes.users
Datex = Base.classes.data
for instance in session.query(Datex).filter(Datex.user_id == 3).filter(Datex.date_time == 'Monday,Apr:26'):
blood_pressure.append([instance.systolic, instance.mean, instance.diastolic])
time.append(instance.time)
for i in range(0, len(blood_pressure)):
trace.append(go.Box(y=blood_pressure[i],
x=time[i],
line=dict(color='#6a92ff'),
hoverinfo='all'))
fig = make_subplots(rows=1, cols=1)
def append_trace():
for i in range(0, len(trace)):
fig.append_trace(trace[i], 1, 1)
append_trace()
return fig, blood_pressure, hr,
您可以通过以下方式提高应用的性能:
非编程方法:
- 如果您的应用程序部署在 AWS 上,请确保您的应用程序通过私有 IP 连接到您的数据库。这减少了您的数据必须遍历的网络数量,并将显着降低延迟。
- 确保您的虚拟机有足够的 RAM。 (如果您将 2GB 的数据加载到具有 1GB 可用 RAM 的机器上,您将在加载到您的程序之前看到 IO 命中磁盘。)
编程方式:
- 模块化连接到您的数据库,并且只连接一次。这减少了保留资源和验证连接到数据库所需的开销
import os
class DbConnection:
"""Use this class to connect to your database within a dashapp"""
def __init__(self, **kwargs):
self.DB_URI = os.environ.get('DB_URI', kwargs.get('DB_URI'))
self.echo = kwargs.get('echo', True)
self.future = kwargs.get('future', True)
# Now create the engine
self.engine = create_engine(self.DB_URI, echo=self.echo, future=self.self)
# Make the session maker
self.session_maker = sessionmaker(bind=self.engine)
@property
def session(self):
"""Return a session as a property"""
return self.session_maker()
# -------------------------------------------
# In your app, instantiate the database connection
# and map your base
my_db_connection = DbConnection() # provide kwargs as needed
session = my_db_connection.session # necessary to assign property to a variable
# Map the classes
Base = automap_base()
Base.prepare(my_db_connection.engine, reflect=True)
User = Base.classes.users
Datex = Base.classes.data
- 缓存经常查询的数据。除非您的数据量很大且变化很大,否则您应该期望从计算机上的磁盘(或 RAM)加载数据比通过网络从数据库加载数据具有更好的性能。
from functools import lru_cache
@lru_cache()
def get_blood_pressure(session, user_id, date):
"""returns blood pressure for a given user for a given date"""
blood_pressure, time = [], []
query = session.query(Datex)\
.filter(Datex.user_id == 3)\
.filter(Datex.date_time == 'Monday,Apr:26')
# I like short variable names when interacting with db results
for rec in query:
time.append(rec.time)
blood_pressure.append([rec.systolic, rec.mean, rec.diastolic])
# finally
return blood_pressure, time
- 把它们放在一起,你的回调应该会快很多
def update_graph_scatter_1(n):
# I'm not sure how these variables will be assigned
# but you'll figure it out
blood_pressure, time = get_blood_pressure(session=session, user_id=user_id, date='Monday,Apr:26')
# Create new traces
for i in range(0, len(blood_pressure)):
trace.append(go.Box(
y=blood_pressure[i],
x=time[i],
line=dict(color='#6a92ff'),
hoverinfo='all'
))
# Add to subplots
fig = make_subplots(rows=1, cols=1)
for i in range(0, len(trace)):
fig.append_trace(trace[i], 1, 1)
return fig, blood_pressure, time
- 最后,您似乎在每次更新时都重新创建图形对象。这是一项繁重的手术。我建议改为更新图表的数据。我知道这是可能的,因为我过去曾这样做过。但不幸的是,看起来解决方案并不简单。也许是稍后回复或跟进问题的项目。