执行多个插入查询以分离 pyodbc.connection 和游标的表和范围
Executing multiple insert queries to separate tables and scope of pyodbc.connection and cursor
我公司每周都会收到一些需要上传到我们数据库中的平面文件。这些通常根据文件的命名约定分成两个单独的表。文件的来源是一致的,并且在 运行 python 脚本之前验证了列。附件是代码目前的样子
import glob
import pandas as pd
import numpy
import pyodbc as dbc
def uploadPerson(filename):
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
df = pd.read_excel(filename)
cursor = conn.cursor()
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
cursor.close()
conn.close()
def uploadCustomer(filename):
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
df = pd.read_excel(filename)
cursor = conn.cursor()
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
cursor.close()
conn.close()
def main():
print('Starting Program')
for filename in glob.glob('*.xlsx'):
if 'Person' in filename:
uploadPerson(filename)
elif 'Customer' in filename:
uploadCustomer(filename)
else:
print('{0} cannot be imported, incorrect name'.format(filename))
print('Program Finished')
我的问题是:
- 是在每个函数中向数据库隐式声明 connection/close 还是在主函数中声明一次并且只在每个函数中执行提交更好?不确定这会对性能造成多大影响,并且很好奇使用 pyodbc 的最佳实践是什么。
- 如果要调用多个不同的查询,最好initialize/close游标吗?
- 因为除了 SQL 之外,这些函数基本上以相同的方式处理,用 if/else 静态定义 sql 并且只有一个上传函数会更好吗?
使用重构后的代码,这是否会更 pythonic 并且在运行时更高效?
import glob
import pandas as pd
import numpy
import pyodbc as dbc
def uploadPerson(filename,conn,cursor):
df = pd.read_excel(filename)
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
def uploadCustomer(filename,conn,curosr):
df = pd.read_excel(filename)
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
def main():
print('Starting Program')
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
cursor = conn.cursor()
for filename in glob.glob('*.xlsx'):
if 'Person' in filename:
uploadPerson(filename, conn, cursor)
elif 'Customer' in filename:
uploadCustomer(filename, conn, cursor)
else:
print('{0} cannot be imported, incorrect name'.format(filename))
cursor.close()
conn.close()
print('Program Finished')
对使用 pyodbc 进行编程有点新,因此我们将不胜感激!
考虑将您的方法封装在一个 class
对象中,该对象打开一次连接并多次重复使用游标,并在删除对象时关闭游标和连接。
import glob
import pandas as pd
import numpy as np
import pyodbc as dbc
class DataBaseAPI(xl_files):
def __init__(self):
self.glob_files = glob.glob(xl_files)
self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
self.failed_import_msg = '{0} cannot be imported, incorrect name'
# INITIALIZE DB OBJECTS
conn_str = 'Driver={SQL Server Native Client 11.0};'
'Server=SERVERNAME;Database=DATABASENAME;'
'Trusted_Connection=yes;'
self.conn = dbc.connect(conn_str)
self.cursor = self.conn.cursor()
def processFiles():
for filename in self.glob_files:
if 'Person' in filename:
self.filename = filename
self.uploadPerson()
elif 'Customer' in filename:
self.filename = filename
self.uploadCustomer()
else:
print(self.failed_import_msg.format(filename))
def uploadPerson(self):
df = pd.read_excel(self.filename)
output = df.to_numpy().tolist()
self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
self.conn.commit()
print(self.success_results_msg.format(filename,'Person',len(df),len(df.columns)))
def uploadCustomer(self):
df = pd.read_excel(self.filename)
output = df.to_numpy().tolist()
self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
self.conn.commit()
print(self.success_results_msg.format(filename,'Customer',len(df),len(df.columns)))
def __del__(self):
# CLOSE DB OBJECTS
self.cursor.close()
self.conn.close()
obj = DataBaseAPI('*.xlsx')
obj.processFiles()
del obj
或者,使用 __enter__
和 __exit__
方法在上下文管理器中 运行 您的 class
对象:
class DataBaseAPI(xl_files):
def __init__(self):
self.glob_files = glob.glob(xl_files)
self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
self.failed_import_msg = '{0} cannot be imported, incorrect name'
def __enter__(self):
# INITIALIZE DB OBJECTS
conn_str = 'Driver={SQL Server Native Client 11.0};'
'Server=SERVERNAME;Database=DATABASENAME;'
'Trusted_Connection=yes;'
self.conn = dbc.connect(conn_str)
self.cursor = self.conn.cursor()
return self # IMPORTANT TO ADD
...
def __exit__(self, exception_type, exception_val, trace):
# CLOSE DB OBJECTS
self.cursor.close()
self.conn.close()
with DataBaseAPI('*.xlsx') as obj:
obj.processFiles()
我公司每周都会收到一些需要上传到我们数据库中的平面文件。这些通常根据文件的命名约定分成两个单独的表。文件的来源是一致的,并且在 运行 python 脚本之前验证了列。附件是代码目前的样子
import glob
import pandas as pd
import numpy
import pyodbc as dbc
def uploadPerson(filename):
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
df = pd.read_excel(filename)
cursor = conn.cursor()
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
cursor.close()
conn.close()
def uploadCustomer(filename):
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
df = pd.read_excel(filename)
cursor = conn.cursor()
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
cursor.close()
conn.close()
def main():
print('Starting Program')
for filename in glob.glob('*.xlsx'):
if 'Person' in filename:
uploadPerson(filename)
elif 'Customer' in filename:
uploadCustomer(filename)
else:
print('{0} cannot be imported, incorrect name'.format(filename))
print('Program Finished')
我的问题是:
- 是在每个函数中向数据库隐式声明 connection/close 还是在主函数中声明一次并且只在每个函数中执行提交更好?不确定这会对性能造成多大影响,并且很好奇使用 pyodbc 的最佳实践是什么。
- 如果要调用多个不同的查询,最好initialize/close游标吗?
- 因为除了 SQL 之外,这些函数基本上以相同的方式处理,用 if/else 静态定义 sql 并且只有一个上传函数会更好吗?
使用重构后的代码,这是否会更 pythonic 并且在运行时更高效?
import glob
import pandas as pd
import numpy
import pyodbc as dbc
def uploadPerson(filename,conn,cursor):
df = pd.read_excel(filename)
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
def uploadCustomer(filename,conn,curosr):
df = pd.read_excel(filename)
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
def main():
print('Starting Program')
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
cursor = conn.cursor()
for filename in glob.glob('*.xlsx'):
if 'Person' in filename:
uploadPerson(filename, conn, cursor)
elif 'Customer' in filename:
uploadCustomer(filename, conn, cursor)
else:
print('{0} cannot be imported, incorrect name'.format(filename))
cursor.close()
conn.close()
print('Program Finished')
对使用 pyodbc 进行编程有点新,因此我们将不胜感激!
考虑将您的方法封装在一个 class
对象中,该对象打开一次连接并多次重复使用游标,并在删除对象时关闭游标和连接。
import glob
import pandas as pd
import numpy as np
import pyodbc as dbc
class DataBaseAPI(xl_files):
def __init__(self):
self.glob_files = glob.glob(xl_files)
self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
self.failed_import_msg = '{0} cannot be imported, incorrect name'
# INITIALIZE DB OBJECTS
conn_str = 'Driver={SQL Server Native Client 11.0};'
'Server=SERVERNAME;Database=DATABASENAME;'
'Trusted_Connection=yes;'
self.conn = dbc.connect(conn_str)
self.cursor = self.conn.cursor()
def processFiles():
for filename in self.glob_files:
if 'Person' in filename:
self.filename = filename
self.uploadPerson()
elif 'Customer' in filename:
self.filename = filename
self.uploadCustomer()
else:
print(self.failed_import_msg.format(filename))
def uploadPerson(self):
df = pd.read_excel(self.filename)
output = df.to_numpy().tolist()
self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
self.conn.commit()
print(self.success_results_msg.format(filename,'Person',len(df),len(df.columns)))
def uploadCustomer(self):
df = pd.read_excel(self.filename)
output = df.to_numpy().tolist()
self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
self.conn.commit()
print(self.success_results_msg.format(filename,'Customer',len(df),len(df.columns)))
def __del__(self):
# CLOSE DB OBJECTS
self.cursor.close()
self.conn.close()
obj = DataBaseAPI('*.xlsx')
obj.processFiles()
del obj
或者,使用 __enter__
和 __exit__
方法在上下文管理器中 运行 您的 class
对象:
class DataBaseAPI(xl_files):
def __init__(self):
self.glob_files = glob.glob(xl_files)
self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
self.failed_import_msg = '{0} cannot be imported, incorrect name'
def __enter__(self):
# INITIALIZE DB OBJECTS
conn_str = 'Driver={SQL Server Native Client 11.0};'
'Server=SERVERNAME;Database=DATABASENAME;'
'Trusted_Connection=yes;'
self.conn = dbc.connect(conn_str)
self.cursor = self.conn.cursor()
return self # IMPORTANT TO ADD
...
def __exit__(self, exception_type, exception_val, trace):
# CLOSE DB OBJECTS
self.cursor.close()
self.conn.close()
with DataBaseAPI('*.xlsx') as obj:
obj.processFiles()