Python - 遍历具有不同变量的相同查询,合并数据帧
Python - loop through same query with different variables, merge data frames
我在 SAS 中有一个查询,我在其中使用宏变量重复对具有不同变量的 Teradata 的查询。我们有 5 个数据库,每个状态一个,其中我 运行 相同的查询,但使用变量更新状态,然后修补所有数据集。我正在 python.
中寻求有关如何执行此操作的帮助
遍历 {state1, state2, state3, state4, state5} 并将每个查询保存为 {stateX}_df 然后合并所有
import teradata as td
import pandas as pd
from teradata import tdodbc
udaExec = td.UdaExec(appConfigFile="udaexec.ini")
with udaExec.connect("${dataSourceName}", LoginTimeout=120) as session:
query1 = """database my_db_{state1};"""
query2 = """
select distinct
{state1}, item_a, item_b
from table
"""
session.execute(query1)
session.execute(query2)
{stateX}_df = pd.read_sql(query2), session)
不确定您使用的是 python 2 还是 python 3。如果您可以使用 python 3.6 或更高版本,也许像下面这样的东西可以工作?
import teradata as td
import pandas as pd
udaExec = td.UdaExec(appName="test", version="1.0", logConsole=False)
with udaExec.connect(
method="odbc",
system="host",
username="username",
password="password",
driver="drivername"
) as conn:
state_dataframes = []
STATES = ["state1", "state2", "state3", "state4", "state5"]
for state in STATES:
sql = f"select distinct {state}, item_a, item_b from my_db_{state}.table;"
state_dataframes.append(pd.read_sql(sql, conn))
combined_data = pd.concat(state_dataframes)
这还没有经过测试,但希望它能让您朝着正确的方向前进。
我能够在单个测试查询上使用它,这真的很有帮助,谢谢@andrew madsen
我还没有解决的是如何在我使用的多个查询中执行此操作。我一直在阅读游标和连接,我认为这会让我到达那里。
import teradata as td
import pandas as pd
from teradata import tdodbc
udaExec = td.UdaExec(appConfigFile="udaexec.ini")
with udaExec.connect("${dataSourceName}") as session:
state_dataframes = []
STATES = ["IL", "TX", "MT", "OK", "NM"]
for state in STATES:
sql = """
select top 10
'{}' as state
,a.*
from my_db_{}.table a
""".format(state,state)
state_dataframes.append(pd.read_sql(sql, session))
all_states_df = pd.concat(state_dataframes)
这是一个改进的版本,带有 volatile table 使用:
udaExec = td.UdaExec(appConfigFile="udaexec.ini")
with udaExec.connect("${dataSourceName}") as session:
state_dataframes = []
STATES = ["state1", "state2", "state3", "state4", "state5"]
for state in STATES:
query1 = """database my_db_{};"""
query2 = """
create set volatile table v_table
,no fallback, no before journal, no after journal as
(
select top 10
'{}' as state
,t.*
from table t
)
with data
primary index (dw_key)
on commit preserve rows;
"""
query3 = """
create set volatile table v_table_2
,no fallback, no before journal, no after journal as
(
select t.*
from v_table t
)
with data
primary index (dw_clm_key)
on commit preserve rows;
"""
query4 = """
select t.*
from v_table_2 t
"""
session.execute(query1.format(state))
session.execute(query2.format(state))
session.execute(query3)
session.execute(query4)
state_dataframes.append(pd.read_sql(query4, session))
session.execute("DROP TABLE v_table")
session.execute("DROP TABLE v_table_2")
all_states_df = pd.concat(state_dataframes)
我在 SAS 中有一个查询,我在其中使用宏变量重复对具有不同变量的 Teradata 的查询。我们有 5 个数据库,每个状态一个,其中我 运行 相同的查询,但使用变量更新状态,然后修补所有数据集。我正在 python.
中寻求有关如何执行此操作的帮助遍历 {state1, state2, state3, state4, state5} 并将每个查询保存为 {stateX}_df 然后合并所有
import teradata as td
import pandas as pd
from teradata import tdodbc
udaExec = td.UdaExec(appConfigFile="udaexec.ini")
with udaExec.connect("${dataSourceName}", LoginTimeout=120) as session:
query1 = """database my_db_{state1};"""
query2 = """
select distinct
{state1}, item_a, item_b
from table
"""
session.execute(query1)
session.execute(query2)
{stateX}_df = pd.read_sql(query2), session)
不确定您使用的是 python 2 还是 python 3。如果您可以使用 python 3.6 或更高版本,也许像下面这样的东西可以工作?
import teradata as td
import pandas as pd
udaExec = td.UdaExec(appName="test", version="1.0", logConsole=False)
with udaExec.connect(
method="odbc",
system="host",
username="username",
password="password",
driver="drivername"
) as conn:
state_dataframes = []
STATES = ["state1", "state2", "state3", "state4", "state5"]
for state in STATES:
sql = f"select distinct {state}, item_a, item_b from my_db_{state}.table;"
state_dataframes.append(pd.read_sql(sql, conn))
combined_data = pd.concat(state_dataframes)
这还没有经过测试,但希望它能让您朝着正确的方向前进。
我能够在单个测试查询上使用它,这真的很有帮助,谢谢@andrew madsen
我还没有解决的是如何在我使用的多个查询中执行此操作。我一直在阅读游标和连接,我认为这会让我到达那里。
import teradata as td
import pandas as pd
from teradata import tdodbc
udaExec = td.UdaExec(appConfigFile="udaexec.ini")
with udaExec.connect("${dataSourceName}") as session:
state_dataframes = []
STATES = ["IL", "TX", "MT", "OK", "NM"]
for state in STATES:
sql = """
select top 10
'{}' as state
,a.*
from my_db_{}.table a
""".format(state,state)
state_dataframes.append(pd.read_sql(sql, session))
all_states_df = pd.concat(state_dataframes)
这是一个改进的版本,带有 volatile table 使用:
udaExec = td.UdaExec(appConfigFile="udaexec.ini")
with udaExec.connect("${dataSourceName}") as session:
state_dataframes = []
STATES = ["state1", "state2", "state3", "state4", "state5"]
for state in STATES:
query1 = """database my_db_{};"""
query2 = """
create set volatile table v_table
,no fallback, no before journal, no after journal as
(
select top 10
'{}' as state
,t.*
from table t
)
with data
primary index (dw_key)
on commit preserve rows;
"""
query3 = """
create set volatile table v_table_2
,no fallback, no before journal, no after journal as
(
select t.*
from v_table t
)
with data
primary index (dw_clm_key)
on commit preserve rows;
"""
query4 = """
select t.*
from v_table_2 t
"""
session.execute(query1.format(state))
session.execute(query2.format(state))
session.execute(query3)
session.execute(query4)
state_dataframes.append(pd.read_sql(query4, session))
session.execute("DROP TABLE v_table")
session.execute("DROP TABLE v_table_2")
all_states_df = pd.concat(state_dataframes)