不同列名的 WHERE 子句

WHERE clause for different column names

下面的脚本反映了我更新、编辑的尝试(遵循下面的建议)在操作数据库中使用来自 tables 的行填充维度 tables,前提是来自 PANDAS DataFrame,通过连接来自 OPDB 中相关 table 的 ID 列创建,不存在于维度表中。

import mysql.connector
import pandas as pd

        ...

op_cursor = op_connector.cursor
dwh_cursor = dwh_connector.cursor

        ...

class dimension_table:  
def __init__(self, dwh_cols, op_cols, dim_id, dwh_table_name, op_table_name,op_args=None, dwh_args=None):
    self.dwh_cols = ('')
    self.op_cols = ('')
    self.dim_id = dim_id
    self.dwh_table_name = dwh_table_name
    self.op_table_name = '`*opdb.*`.' + op_table_name
    self.op_args = ",".join(op_cols)
    self.dwh_args = ",".join(dwh_cols)

        ...


billing_address_data = dimension_table(("id","address", "alias", "postal_code", "type", "city", "country", 
                                       "geolocation"),
                                      ("id","address", "alias", "postal_code", "type", "city", "country", 
                                       "geolocation"),
                                      billing_address_dim_id,'billing_address_dim', 'billing_address')

        ...

def load_dim(instance):
sql = """INSERT INTO {dwh} ({dwh_cols})
         SELECT {op_cols} 
         FROM {op}
         WHERE {pk} NOT IN
            (SELECT {pk} FROM {dwh} WHERE id = %s)
         LIMIT 1
      """
for key in instance.dim_id:

    try:            
        # ID APPEND
        dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
                                      dwh_cols = instance.dwh_args,
                                      op_cols = instance.op_args,
                                      op = instance.op_table_name,
                                      pk = 'id'),

                           str(key))

        dwh_connector.commit()

    except mysql.connector.ProgrammingError as err:                         
        # ORDER_ID APPEND
        dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
                                      dwh_cols = instance.dwh_args,
                                      op_cols = instance.op_args,
                                      op = instance.op_table_name,
                                      pk = 'order_id'),

                           str(key))

        dwh_connector.commit()

    billing_profile_op_id = dwh_cursor.lastrowid 

      ...

load_dim(order_items_data)

我最近遇到的问题是 运行 脚本最后一行代码 load_dim(order_items_data) 导致的错误。 是 order_items table 与 order_id PK.

ProgrammingError: 1054 (42S22): Unknown column 'id' in 'where clause'

考虑 try/except 并通过使用带有 IN 子句的纯插入-select SQL 查询来避免所有查询构建和 fetch 检查,因为此反映了非重复追加查询的需要。参见 NOT IN vs. NOT EXISTS vs. LEFT JOIN / IS NULL

下面使用LIMIT 1代替fetchone(),否则根据RDBMS使用TOP 1fetch first 1 rows only。另外,参数占位符使用%s,否则根据Python DB-API使用?。在以后的帖子中,始终标记 RDBMS 并使用 import 行显示 DB-API。

def load_dim(instance):
    sql = """INSERT INTO {dwh} ({dwh_cols})
             SELECT {op_cols} 
             FROM {op}
             WHERE {pk} NOT IN
                (SELECT {pk} FROM {dwh} WHERE {pk} = %s)
             LIMIT 1
          """
    for key in instance.dim_id:

        try:            
            # ID APPEND
            dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
                                          dwh_cols = instance.dwh_args,
                                          op_cols = instance.op_args,
                                          op = instance.op_table_name,
                                          pk = 'id'),
                               (str(key),))

            dwh_connector.commit()

        except Exception as e:                          # ADJUST TO DB-API SPECIFIC Error
            # ORDER_ID APPEND
            dwh_cursor.execute(sql.format(dwh = instance.dwh_table_name,
                                          dwh_cols = instance.dwh_args,
                                          op_cols = instance.op_args,
                                          op = instance.op_table_name,
                                          pk = 'order_id'),
                               (str(key),))

            dwh_connector.commit()

        billing_profile_op_id = dwh_cursor.lastrowid    # RETURNS 0 IF NO DATA APPENDED