如何在 Python 中使用 openpyxl 库遍历多个 excel 工作表?

How to iterate thorugh multiple excel sheets using openpyxl library in Python?

我正在使用 Openpyxl 库读取 xlsx 文件并提取少量内容并向我的 txt 输出添加更多字符串 file.The excel 我目前使用的文件包含名为 Summary 和 Employee 的工作表。我的以下代码适用于我当前的 excel 文件。现在的问题是我将使用相同的代码来读取另一个 excel 文件,其中包含我不确定其工作表名称的更多工作表。所以在我的代码行 ws = wb['Employee'] 中。工作表名称将一直更改。但是,我确定的一件事是我不想从 sheet1 读取任何数据。所有 xlsx 文件中的所有数据提取都将从 sheet2 开始进行。我不确定如何从这里开始,因此我们将不胜感激。

提前感谢您的时间和努力!

代码:

from openpyxl import load_workbook

data_file='\test.xlsx'

# Load the entire workbook.
wb = load_workbook(data_file)
ws = wb['Employee'] #Manually adding sheet name here


mylines={"Column_name":[],"Column_Type":[]} #Getting 2 columns data from row 6

type_strs = {
    'String': 'VARCHAR(256)',
    'Numeric': 'NUMBER',
    'Date': 'NUMBER(4,0)',
    'Int': 'NUMBER'
}

for index, value in enumerate(mylines["Column_Type"]):
    mylines["Column_Type"][index] = type_strs.get(value, value)

    
for i in range(6, ws.max_row+1):  
        name = ws.cell(row=i, column=1).value
        name1=ws.cell(row=i, column=2).value
        mylines["Column_name"].append(name) #Appending dictionary key "Column_name"
        mylines["Column_Type"].append(name1) #Appending dictionay key "Column_type"
        for index, value in enumerate(mylines["Column_Type"]):
            mylines["Column_Type"][index] = type_strs.get(value, value)
        
        
        
theString = " "
for i in range(len(mylines['Column_name'])):
    theString += mylines['Column_name'][i] + " " + mylines['Column_Type'][i]
    if i < len(mylines['Column_name'])-1:
        theString += ", "


outputFile = open('/output.txt', 'w')  # Text file Output
outputFile.write("CREATE TABLE TRANSIENT TABLE STG_EMPLOYEE({});".format(theString) + "\n")

outputFile.close() #Closing file

根据 SO 用户评论更新代码:

from openpyxl import load_workbook

data_file='\test.xlsx'

# Load the entire workbook.
wb = load_workbook(data_file)
#ws = wb['Employee'] #Manually adding sheet name here


mylines={"Column_name":[],"Column_Type":[]} #Getting 2 columns data from row 6

type_strs = {
    'String': 'VARCHAR(256)',
    'Numeric': 'NUMBER',
    'Date': 'NUMBER(4,0)',
    'Int': 'NUMBER'
}

for index, value in enumerate(mylines["Column_Type"]):
    mylines["Column_Type"][index] = type_strs.get(value, value)

skip = True
for ws in wb.worksheets:
    if skip == True:
        skip = False
    else:   
        for i in range(6, ws.max_row+1):  
            name = ws.cell(row=i, column=1).value
            name1=ws.cell(row=i, column=2).value
            mylines["Column_name"].append(name) #Appending dictionary key "Column_name"
            mylines["Column_Type"].append(name1) #Appending dictionay key "Column_type"
        for index, value in enumerate(mylines["Column_Type"]):
            mylines["Column_Type"][index] = type_strs.get(value, value)
        
        
        
theString = " "
for i in range(len(mylines['Column_name'])):
    theString += mylines['Column_name'][i] + " " + mylines['Column_Type'][i]
    if i < len(mylines['Column_name'])-1:
        theString += ", "


outputFile = open('/output.txt', 'w')  # Text file Output
outputFile.write("CREATE TABLE TRANSIENT TABLE STG_EMPLOYEE({});".format(theString) + "\n")

outputFile.close() #Closing file

Excel数据

<Sheet 1 Name -> Summary Sheet: Empty
<Sheet 2 Name -> Employee Sheet 
                File Name:  Employee
                Sheet Name: Employee
                File Type:  csv
    
                Field Name  Type
                   Name     String
            Salary  Numeric
            Date    Date
            Phone       Int

<Sheet 3 Name->   Employee1 Sheet
            File Name:  Employee
            Sheet Name: Employee1
            File Type:  csv
    
            Field Name  Type
            Employee Name   Date
            Employee Salary Int
            Employment Date Int
            Office Phone    Int
    

遍历工作簿中的所有作品sheet并读取其中的数据(第一个作品sheet除外,删除ws = wb['Employee']

使用 for 循环(插入 for i in range(5,... 之前

skip = True
for ws in wb.worksheets:
    if skip == True:
        skip = False
    else:
        for i in range(6, ws.max_row+1):
            name = ws.cell(row=i, column=1).value
            ....

这将读取每个 sheet 并将数据附加到 mylines,除了第一个 sheet

第二次更新 正如您在下面的评论中提到的,要使用新的 SQL 查询添加新行,请进行这些额外的更改

  1. 向字典添加另一个条目以指示新行,如下所示(注意确保在读取特定 sheet 中的所有行后执行这些行) 编辑字符串格式,以便在看到换行符后,将该字符串写入输出文件。请注意,NewFile 布尔值将覆盖那里的任何文件。将附加多行 post that.
skip = True
for ws in wb.worksheets:
    if skip == True:
        skip = False
    else:
        for i in range(6, ws.max_row+1):  
            name = ws.cell(row=i, column=1).value
            print(i, name)
            name1=ws.cell(row=i, column=2).value
            print(name1)
            mylines["Column_name"].append(name) #Appending dictionary key "Column_name"
            mylines["Column_Type"].append(name1) #Appending dictionay key "Column_type"
            for index, value in enumerate(mylines["Column_Type"]):
                mylines["Column_Type"][index] = type_strs.get(value, value)
        mylines["Column_name"].append('NextLine')
        mylines["Column_Type"].append('NextLine')
theString = " "
NewFile = True
sheetList = wb.sheetnames
tabIndex = 1

for i in range(len(mylines['Column_name'])):
    if(mylines['Column_name'][i] != 'NextLine'):
        theString += mylines['Column_name'][i] + " " + mylines['Column_Type'][i]
        theString += ", "
    else:
        theString = theString[:-2]
        if NewFile:
            NewFile = False
            outputFile = open('output.txt', 'w')  # Text file Output
            print("New file  ", theString)
        else:
            outputFile = open('output.txt', 'a')
            print("Not new file  ", theString)
        outputFile.write("CREATE TABLE TRANSIENT TABLE STG_" + sheetList[tabIndex] +"({});".format(theString) + "\n")
        outputFile.close()
        tabIndex += 1
        theString = " "