不同列名的 WHERE 子句
WHERE clause for different column names
下面的脚本反映了我更新、编辑的尝试(遵循下面的建议)在操作数据库中使用来自 tables 的行填充维度 tables,前提是来自 PANDAS DataFrame,通过连接来自 OPDB 中相关 table 的 ID 列创建,不存在于维度表中。
import mysql.connector
import pandas as pd
...
op_cursor = op_connector.cursor
dwh_cursor = dwh_connector.cursor
...
class dimension_table:
def __init__(self, dwh_cols, op_cols, dim_id, dwh_table_name, op_table_name,op_args=None, dwh_args=None):
self.dwh_cols = ('')
self.op_cols = ('')
self.dim_id = dim_id
self.dwh_table_name = dwh_table_name
self.op_table_name = '`*opdb.*`.' + op_table_name
self.op_args = ",".join(op_cols)
self.dwh_args = ",".join(dwh_cols)
...
billing_address_data = dimension_table(("id","address", "alias", "postal_code", "type", "city", "country",
"geolocation"),
("id","address", "alias", "postal_code", "type", "city", "country",
"geolocation"),
billing_address_dim_id,'billing_address_dim', 'billing_address')
...
def load_dim(instance):
sql = """INSERT INTO {dwh} ({dwh_cols})
SELECT {op_cols}
FROM {op}
WHERE {pk} NOT IN
(SELECT {pk} FROM {dwh} WHERE id = %s)
LIMIT 1
"""
for key in instance.dim_id:
try:
# ID APPEND
dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
dwh_cols = instance.dwh_args,
op_cols = instance.op_args,
op = instance.op_table_name,
pk = 'id'),
str(key))
dwh_connector.commit()
except mysql.connector.ProgrammingError as err:
# ORDER_ID APPEND
dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
dwh_cols = instance.dwh_args,
op_cols = instance.op_args,
op = instance.op_table_name,
pk = 'order_id'),
str(key))
dwh_connector.commit()
billing_profile_op_id = dwh_cursor.lastrowid
...
load_dim(order_items_data)
我最近遇到的问题是 运行 脚本最后一行代码 load_dim(order_items_data)
导致的错误。
是 order_items table 与 order_id PK.
ProgrammingError: 1054 (42S22): Unknown column 'id' in 'where clause'
考虑 try/except
并通过使用带有 IN
子句的纯插入-select SQL 查询来避免所有查询构建和 fetch
检查,因为此反映了非重复追加查询的需要。参见 NOT IN vs. NOT EXISTS vs. LEFT JOIN / IS NULL。
下面使用LIMIT 1
代替fetchone()
,否则根据RDBMS使用TOP 1
或fetch first 1 rows only
。另外,参数占位符使用%s
,否则根据Python DB-API使用?
。在以后的帖子中,始终标记 RDBMS 并使用 import
行显示 DB-API。
def load_dim(instance):
sql = """INSERT INTO {dwh} ({dwh_cols})
SELECT {op_cols}
FROM {op}
WHERE {pk} NOT IN
(SELECT {pk} FROM {dwh} WHERE {pk} = %s)
LIMIT 1
"""
for key in instance.dim_id:
try:
# ID APPEND
dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
dwh_cols = instance.dwh_args,
op_cols = instance.op_args,
op = instance.op_table_name,
pk = 'id'),
(str(key),))
dwh_connector.commit()
except Exception as e: # ADJUST TO DB-API SPECIFIC Error
# ORDER_ID APPEND
dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
dwh_cols = instance.dwh_args,
op_cols = instance.op_args,
op = instance.op_table_name,
pk = 'order_id'),
(str(key),))
dwh_connector.commit()
billing_profile_op_id = dwh_cursor.lastrowid # RETURNS 0 IF NO DATA APPENDED
下面的脚本反映了我更新、编辑的尝试(遵循下面的建议)在操作数据库中使用来自 tables 的行填充维度 tables,前提是来自 PANDAS DataFrame,通过连接来自 OPDB 中相关 table 的 ID 列创建,不存在于维度表中。
import mysql.connector
import pandas as pd
...
op_cursor = op_connector.cursor
dwh_cursor = dwh_connector.cursor
...
class dimension_table:
def __init__(self, dwh_cols, op_cols, dim_id, dwh_table_name, op_table_name,op_args=None, dwh_args=None):
self.dwh_cols = ('')
self.op_cols = ('')
self.dim_id = dim_id
self.dwh_table_name = dwh_table_name
self.op_table_name = '`*opdb.*`.' + op_table_name
self.op_args = ",".join(op_cols)
self.dwh_args = ",".join(dwh_cols)
...
billing_address_data = dimension_table(("id","address", "alias", "postal_code", "type", "city", "country",
"geolocation"),
("id","address", "alias", "postal_code", "type", "city", "country",
"geolocation"),
billing_address_dim_id,'billing_address_dim', 'billing_address')
...
def load_dim(instance):
sql = """INSERT INTO {dwh} ({dwh_cols})
SELECT {op_cols}
FROM {op}
WHERE {pk} NOT IN
(SELECT {pk} FROM {dwh} WHERE id = %s)
LIMIT 1
"""
for key in instance.dim_id:
try:
# ID APPEND
dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
dwh_cols = instance.dwh_args,
op_cols = instance.op_args,
op = instance.op_table_name,
pk = 'id'),
str(key))
dwh_connector.commit()
except mysql.connector.ProgrammingError as err:
# ORDER_ID APPEND
dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
dwh_cols = instance.dwh_args,
op_cols = instance.op_args,
op = instance.op_table_name,
pk = 'order_id'),
str(key))
dwh_connector.commit()
billing_profile_op_id = dwh_cursor.lastrowid
...
load_dim(order_items_data)
我最近遇到的问题是 运行 脚本最后一行代码 load_dim(order_items_data)
导致的错误。
是 order_items table 与 order_id PK.
ProgrammingError: 1054 (42S22): Unknown column 'id' in 'where clause'
考虑 try/except
并通过使用带有 IN
子句的纯插入-select SQL 查询来避免所有查询构建和 fetch
检查,因为此反映了非重复追加查询的需要。参见 NOT IN vs. NOT EXISTS vs. LEFT JOIN / IS NULL。
下面使用LIMIT 1
代替fetchone()
,否则根据RDBMS使用TOP 1
或fetch first 1 rows only
。另外,参数占位符使用%s
,否则根据Python DB-API使用?
。在以后的帖子中,始终标记 RDBMS 并使用 import
行显示 DB-API。
def load_dim(instance):
sql = """INSERT INTO {dwh} ({dwh_cols})
SELECT {op_cols}
FROM {op}
WHERE {pk} NOT IN
(SELECT {pk} FROM {dwh} WHERE {pk} = %s)
LIMIT 1
"""
for key in instance.dim_id:
try:
# ID APPEND
dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
dwh_cols = instance.dwh_args,
op_cols = instance.op_args,
op = instance.op_table_name,
pk = 'id'),
(str(key),))
dwh_connector.commit()
except Exception as e: # ADJUST TO DB-API SPECIFIC Error
# ORDER_ID APPEND
dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
dwh_cols = instance.dwh_args,
op_cols = instance.op_args,
op = instance.op_table_name,
pk = 'order_id'),
(str(key),))
dwh_connector.commit()
billing_profile_op_id = dwh_cursor.lastrowid # RETURNS 0 IF NO DATA APPENDED