执行多个插入查询以分离 pyodbc.connection 和游标的表和范围

Executing multiple insert queries to separate tables and scope of pyodbc.connection and cursor

我公司每周都会收到一些需要上传到我们数据库中的平面文件。这些通常根据文件的命名约定分成两个单独的表。文件的来源是一致的,并且在 运行 python 脚本之前验证了列。附件是代码目前的样子

import glob
import pandas as pd
import numpy
import pyodbc as dbc
 
def uploadPerson(filename):
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    df = pd.read_excel(filename)
 
    cursor = conn.cursor()
    output = df.values.tolist()
 
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
 
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
    cursor.close()
    conn.close()
 
def uploadCustomer(filename):
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    df = pd.read_excel(filename)
 
    cursor = conn.cursor()
    output = df.values.tolist()
 
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
 
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
    cursor.close()
    conn.close()
 
def main():
    print('Starting Program')
    for filename in glob.glob('*.xlsx'):
        if 'Person' in filename:
            uploadPerson(filename)
        elif 'Customer' in filename:
            uploadCustomer(filename)
        else:
            print('{0} cannot be imported, incorrect name'.format(filename))
    print('Program Finished')

我的问题是:

使用重构后的代码,这是否会更 pythonic 并且在运行时更高效?

import glob
import pandas as pd
import numpy
import pyodbc as dbc
 
def uploadPerson(filename,conn,cursor):
    df = pd.read_excel(filename)
    output = df.values.tolist()
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
def uploadCustomer(filename,conn,curosr):
    df = pd.read_excel(filename)
    output = df.values.tolist()
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))

def main():
    print('Starting Program')
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    cursor = conn.cursor()

    for filename in glob.glob('*.xlsx'):
        if 'Person' in filename:
            uploadPerson(filename, conn, cursor)
        elif 'Customer' in filename:
            uploadCustomer(filename, conn, cursor)
        else:
            print('{0} cannot be imported, incorrect name'.format(filename))
    
    cursor.close()
    conn.close()
    print('Program Finished')

对使用 pyodbc 进行编程有点新,因此我们将不胜感激!

考虑将您的方法封装在一个 class 对象中,该对象打开一次连接并多次重复使用游标,并在删除对象时关闭游标和连接。

import glob
import pandas as pd
import numpy as np
import pyodbc as dbc

class DataBaseAPI(xl_files):

    def __init__(self):
        self.glob_files = glob.glob(xl_files)
        self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
        self.failed_import_msg = '{0} cannot be imported, incorrect name'
        
        # INITIALIZE DB OBJECTS
        conn_str = 'Driver={SQL Server Native Client 11.0};'
                   'Server=SERVERNAME;Database=DATABASENAME;'
                   'Trusted_Connection=yes;'
        self.conn = dbc.connect(conn_str)
        self.cursor = self.conn.cursor()

    def processFiles():
        for filename in self.glob_files:
            if 'Person' in filename:
                self.filename = filename
                self.uploadPerson()
            elif 'Customer' in filename:
                self.filename = filename
                self.uploadCustomer()
            else:
                print(self.failed_import_msg.format(filename))
                
    def uploadPerson(self):
        df = pd.read_excel(self.filename)
        output = df.to_numpy().tolist()
        self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
        self.conn.commit()
        print(self.success_results_msg.format(filename,'Person',len(df),len(df.columns)))
     
    def uploadCustomer(self):
        df = pd.read_excel(self.filename)
        output = df.to_numpy().tolist()
        self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
        self.conn.commit()
        print(self.success_results_msg.format(filename,'Customer',len(df),len(df.columns)))

    def __del__(self):
        # CLOSE DB OBJECTS
        self.cursor.close()
        self.conn.close()
                    
        
obj = DataBaseAPI('*.xlsx') 
obj.processFiles()
del obj

或者,使用 __enter____exit__ 方法在上下文管理器中 运行 您的 class 对象:

class DataBaseAPI(xl_files):

    def __init__(self):
        self.glob_files = glob.glob(xl_files)
        self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
        self.failed_import_msg = '{0} cannot be imported, incorrect name'

    def __enter__(self):
        # INITIALIZE DB OBJECTS
        conn_str = 'Driver={SQL Server Native Client 11.0};'
                   'Server=SERVERNAME;Database=DATABASENAME;'
                   'Trusted_Connection=yes;'
        self.conn = dbc.connect(conn_str)
        self.cursor = self.conn.cursor()

        return self    # IMPORTANT TO ADD

     ...

     def __exit__(self, exception_type, exception_val, trace):
        # CLOSE DB OBJECTS
        self.cursor.close()
        self.conn.close()


with DataBaseAPI('*.xlsx') as obj:
    obj.processFiles()