Postgres PL/pgSQL 合并存在于各种表中的列
Postgres PL/pgSQL To Consolidate Columns Existing Across Various Tables
我正在实施一个工具来清理名为 stage
的架构中各个表中的所有客户名称。客户名称可能来自 billing_acc_name
或 cust_acc_names
列。我事先不知道有多少表有这些列,但只要有,它们就会成为清理的一部分。
但是,在清理之前,我需要 select 架构中所有表的所有唯一客户名称。
为了更好地分离关注点,我正在考虑在 PL/pgSQL 中实现它。目前,这就是我在 Python/pandas/SQLAlchemy 等
中实现的方式
table_name = 'information_schema.columns'
table_schema_src = 'stage'
cols = ['billing_acc_name', 'cust_acc_name']
# get list of all table names and column names to query in stage schema
sql = text(f"""
SELECT table_name, column_name FROM {table_name} WHERE table_schema ='{table_schema_src}'
AND column_name = ANY(ARRAY{cols})
""")
src = pd.read_sql(sql, con=engine)
# explore implementation in pgsql
# establish query string
cnames = []
for i, row in src.iterrows():
s = text(f"""
SELECT DISTINCT upper({row['column_name']}) AS cname FROM stage.{row['table_name']}
""")
cnames.append(str(s).strip())
sql = ' UNION '.join(cnames)
df = pd.read_sql(sql, con=engine)
自动生成的SQL查询字符串如下:
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyA UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyA UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyB UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyB UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyC UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyC UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyD UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyD
plpgsql 函数可能如下所示:
create or replace function select_acc_names(_schema text)
returns setof text language plpgsql as $$
declare
rec record;
begin
for rec in
select table_name, column_name
from information_schema.columns
where table_schema = _schema
and column_name = any(array['cust_acc_name', 'billing_acc_name'])
loop
return query
execute format ($fmt$
select upper(%I) as cname
from %I.%I
$fmt$, rec.column_name, _schema, rec.table_name);
end loop;
end $$;
使用:
select *
from select_acc_names('stage');
我正在实施一个工具来清理名为 stage
的架构中各个表中的所有客户名称。客户名称可能来自 billing_acc_name
或 cust_acc_names
列。我事先不知道有多少表有这些列,但只要有,它们就会成为清理的一部分。
但是,在清理之前,我需要 select 架构中所有表的所有唯一客户名称。
为了更好地分离关注点,我正在考虑在 PL/pgSQL 中实现它。目前,这就是我在 Python/pandas/SQLAlchemy 等
中实现的方式table_name = 'information_schema.columns'
table_schema_src = 'stage'
cols = ['billing_acc_name', 'cust_acc_name']
# get list of all table names and column names to query in stage schema
sql = text(f"""
SELECT table_name, column_name FROM {table_name} WHERE table_schema ='{table_schema_src}'
AND column_name = ANY(ARRAY{cols})
""")
src = pd.read_sql(sql, con=engine)
# explore implementation in pgsql
# establish query string
cnames = []
for i, row in src.iterrows():
s = text(f"""
SELECT DISTINCT upper({row['column_name']}) AS cname FROM stage.{row['table_name']}
""")
cnames.append(str(s).strip())
sql = ' UNION '.join(cnames)
df = pd.read_sql(sql, con=engine)
自动生成的SQL查询字符串如下:
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyA UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyA UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyB UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyB UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyC UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyC UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyD UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyD
plpgsql 函数可能如下所示:
create or replace function select_acc_names(_schema text)
returns setof text language plpgsql as $$
declare
rec record;
begin
for rec in
select table_name, column_name
from information_schema.columns
where table_schema = _schema
and column_name = any(array['cust_acc_name', 'billing_acc_name'])
loop
return query
execute format ($fmt$
select upper(%I) as cname
from %I.%I
$fmt$, rec.column_name, _schema, rec.table_name);
end loop;
end $$;
使用:
select *
from select_acc_names('stage');