设置 Dataframe loc 导致 SQL 字符串 - 元组错误
Setting Dataframe loc result in SQL string - Tuples Error
当我在 pd.read_sql_query 字符串中使用 df.loc 值时出现元组错误。
import pyodbc as db
import pandas as pd
#Connection String
Conn = db.connect('Driver={driver};Server=server;Database=db;Trusted_Connection=yes')
#DataFrame that stores SQL LookUp Table
dfSQLLookUp = pd.read_sql_query('Select col1,
col2 from dbo.LookUp_TableNames', Conn)
#Loop to store table in Dataframe and (eventually) ouput to Excel file
varIndex = -1
for i in dfSQLLookUp.itertuples():
varIndex += 1
dfResults = pd.read_sql_query('if (exists (select * from information_schema.TABLES where TABLE_SCHEMA = dbo and TABLE_NAME = {})) select * from dbo.{} else print table does not exist'.format(dfSQLLookUp.loc[varIndex, 'SQLTableNameColumn']), Conn)
当我打印 df.loc 值时:
import pyodbc as db
import pandas as pd
#Connection String
Conn = db.connect('Driver={driver};Server=server;Database=db;Trusted_Connection=yes')
#DataFrame that stores SQL LookUp Table
dfSQLLookUp = pd.read_sql_query('Select col1,
col2 from dbo.LookUp_TableNames', Conn)
varIndex = -1
for i in dfSQLNames.itertuples():
varIndex += 1
print (dfSQLNames.loc[varIndex, 'IFSFlatFileName'])
返回了正确的字符串值。这是 pd.read_sql_query 语法的问题吗?还是元组错误?感谢任何见解。
元组错误文本:
dfResults = pd.read_sql_query('if (exists (select * from
information_schema.TABLES where TABLE_SCHEMA = dbo and TABLE_NAME =
{})) select * from dbo.{} else print table does not
exist'.format(dfSQLLookUp.loc[varIndex, 'SQLTableNamesColumn']), Conn)
IndexError: tuple index out of range Press any key to continue . . .
正如@MaxU 在评论中提到的,Pandas / SQL Alchemy 不支持 T-SQL。这很可能是引发元组错误的原因。为了绕过 Pandas / SQL Alchemy 的限制,我在 SQL 中创建了一个存储过程来填充查找 table。我还必须在过程中设置 NOCOUNT ON,因为在 Python 中调用过程时出现 NullType 错误。下面是最终结果。希望这可以帮助其他人代替我。
import pyodbc as db
import pandas as pd
#Database Connection String
Conn = db.connect('Driver={driver};Server=server;Database=db;Trusted_Connection=yes')
#DataFrame that stores the LookUp values for the SQL table names and the Excel file names
dfSQLLookUp = pd.read_sql_query('Exec dbo.uspDataFrameLookUp', Conn)
#Loop to store SQL tables in a Dataframe and output to Excel file
varIndex = -1
for i in dfSQLLookUp.itertuples():
varIndex += 1
dfResults = pd.read_sql_query('select * from dbo.{}'.format(dfSQLLookUp.loc[varIndex, 'NameOfCol1']), Conn)
#Writing the Dataframe to an Excel file
OutputDirectory = r'\path\...\PythonExtracts\{}.xlsx'.format(dfSQLLookUp.loc[varIndex, 'NameOfCol2'])
Excel = pd.ExcelWriter(OutputDirectory, engine='xlsxwriter')
dfResults.to_excel(Excel, sheet_name='Sheet1')
Excel.save()
Conn.close()
存储过程
ALTER procedure [dbo].[uspDataFrameLookUp]
as
begin
set nocount on
declare @varTableName nvarchar(30)
declare @varCountRows int
declare @varCountLoop int
declare @varDFTable table (NameOfCol1 nvarchar(50),
NameOfCol2 nvarchar(50))
set @varCountRows = (select count(*) from [dbo].[LookUp_TableNames])
set @varCountLoop = 0
while @varCountLoop <= @varCountRows
begin
set @varCountLoop = @varCountLoop + 1
set @varTableName = (select NameOfCol1 from
dbo.LookUp_TableNames where ID = @varCountLoop)
if (exists (select *
from information_schema.TABLES
where TABLE_SCHEMA = 'dbo'
and TABLE_NAME = @varTableName))
insert into @varDFTable
select NameOfCol1, NameOfCol2
from dbo.LookUp_TableNames
where NameOfCol1 = @varTableName
end
select * from @varDFTable order by NameOfCol1 asc
end
当我在 pd.read_sql_query 字符串中使用 df.loc 值时出现元组错误。
import pyodbc as db
import pandas as pd
#Connection String
Conn = db.connect('Driver={driver};Server=server;Database=db;Trusted_Connection=yes')
#DataFrame that stores SQL LookUp Table
dfSQLLookUp = pd.read_sql_query('Select col1,
col2 from dbo.LookUp_TableNames', Conn)
#Loop to store table in Dataframe and (eventually) ouput to Excel file
varIndex = -1
for i in dfSQLLookUp.itertuples():
varIndex += 1
dfResults = pd.read_sql_query('if (exists (select * from information_schema.TABLES where TABLE_SCHEMA = dbo and TABLE_NAME = {})) select * from dbo.{} else print table does not exist'.format(dfSQLLookUp.loc[varIndex, 'SQLTableNameColumn']), Conn)
当我打印 df.loc 值时:
import pyodbc as db
import pandas as pd
#Connection String
Conn = db.connect('Driver={driver};Server=server;Database=db;Trusted_Connection=yes')
#DataFrame that stores SQL LookUp Table
dfSQLLookUp = pd.read_sql_query('Select col1,
col2 from dbo.LookUp_TableNames', Conn)
varIndex = -1
for i in dfSQLNames.itertuples():
varIndex += 1
print (dfSQLNames.loc[varIndex, 'IFSFlatFileName'])
返回了正确的字符串值。这是 pd.read_sql_query 语法的问题吗?还是元组错误?感谢任何见解。
元组错误文本:
dfResults = pd.read_sql_query('if (exists (select * from information_schema.TABLES where TABLE_SCHEMA = dbo and TABLE_NAME = {})) select * from dbo.{} else print table does not exist'.format(dfSQLLookUp.loc[varIndex, 'SQLTableNamesColumn']), Conn) IndexError: tuple index out of range Press any key to continue . . .
正如@MaxU 在评论中提到的,Pandas / SQL Alchemy 不支持 T-SQL。这很可能是引发元组错误的原因。为了绕过 Pandas / SQL Alchemy 的限制,我在 SQL 中创建了一个存储过程来填充查找 table。我还必须在过程中设置 NOCOUNT ON,因为在 Python 中调用过程时出现 NullType 错误。下面是最终结果。希望这可以帮助其他人代替我。
import pyodbc as db
import pandas as pd
#Database Connection String
Conn = db.connect('Driver={driver};Server=server;Database=db;Trusted_Connection=yes')
#DataFrame that stores the LookUp values for the SQL table names and the Excel file names
dfSQLLookUp = pd.read_sql_query('Exec dbo.uspDataFrameLookUp', Conn)
#Loop to store SQL tables in a Dataframe and output to Excel file
varIndex = -1
for i in dfSQLLookUp.itertuples():
varIndex += 1
dfResults = pd.read_sql_query('select * from dbo.{}'.format(dfSQLLookUp.loc[varIndex, 'NameOfCol1']), Conn)
#Writing the Dataframe to an Excel file
OutputDirectory = r'\path\...\PythonExtracts\{}.xlsx'.format(dfSQLLookUp.loc[varIndex, 'NameOfCol2'])
Excel = pd.ExcelWriter(OutputDirectory, engine='xlsxwriter')
dfResults.to_excel(Excel, sheet_name='Sheet1')
Excel.save()
Conn.close()
存储过程
ALTER procedure [dbo].[uspDataFrameLookUp]
as
begin
set nocount on
declare @varTableName nvarchar(30)
declare @varCountRows int
declare @varCountLoop int
declare @varDFTable table (NameOfCol1 nvarchar(50),
NameOfCol2 nvarchar(50))
set @varCountRows = (select count(*) from [dbo].[LookUp_TableNames])
set @varCountLoop = 0
while @varCountLoop <= @varCountRows
begin
set @varCountLoop = @varCountLoop + 1
set @varTableName = (select NameOfCol1 from
dbo.LookUp_TableNames where ID = @varCountLoop)
if (exists (select *
from information_schema.TABLES
where TABLE_SCHEMA = 'dbo'
and TABLE_NAME = @varTableName))
insert into @varDFTable
select NameOfCol1, NameOfCol2
from dbo.LookUp_TableNames
where NameOfCol1 = @varTableName
end
select * from @varDFTable order by NameOfCol1 asc
end