如何使用 openpyxl 读取 python 中的合并单元格?
How to read merged cells in python using openpyxl?
我正在尝试从具有 merged_cells_range 的 excel 文件中读取数据...但输出不是我的目标。请帮帮我
import openpyxl
wb = openpyxl.load_workbook('book1.xlsx')
sheet = wb.get_sheet_by_name('info')
all_data=[]
print(sheet.merged_cells.ranges)
for row_index in range(1,sheet.max_row+1):
row=[]
for col_index in range(1,sheet.max_column+1):
vals = sheet.cell(row_index,col_index).value
if vals =='':
for crange in sheet.merged_cells.ranges:
rlo,rhi,clo,chi = crange
if rlo<=row_index and row_index<rhi and clo<=col_index and col_index<chi:
vals = sheet.cell(rlo,clo).value
print(vals)
break
row.append(vals)
all_data.append(row)
print(all_data)
for row in all_data:
sheet.append(row)
wb.save('bbbb.xlsx')
我希望获得输出:
[['06B', 'Daewoo BC 212', 80, 1373], ['06C', 'Daewoo BC 212', 80, 1020], ['06D', 'Transinco B60KL', 60, 1061], [ '06D', 'Transinco B60KL', 60, 19], ['06E', 'Daewoo BC 212', 80, 1020], ['06E', 'Daewoo BC 212', 60, 1061], ['06E ', 'Daewoo BC 212', 60, 19]] 但结果是:
[['06B', 'Daewoo BC 212', 80, 1373], ['06C', 'Daewoo BC 212', 80, 1020], ['06D', 'Transinco B60KL', 60 , 1061], [None, None, 60, 19], ['06E', 'Daewoo BC 212', 80, 1020], [None, None , 60, 1061], [None, None, 60, 19]]
my input
output desired
好了:
=^..^=
import openpyxl
from openpyxl import Workbook
# load data
raw_data = openpyxl.load_workbook('data.xlsx')
select_sheet = raw_data['Sheet1']
# collect data from rows
valid_row = []
data = []
for row in select_sheet.iter_rows(max_row=select_sheet.max_row, max_col=select_sheet.max_column):
# get cell values
row_data = [cell.value for cell in row]
# handle merged cells
new_row_data = [0]*select_sheet.max_column
if None in row_data:
new_row_data[0] = valid_row[0]
new_row_data[1] = valid_row[1]
new_row_data[2] = row_data[2]
new_row_data[3] = row_data[3]
data.append(new_row_data)
else:
data.append(row_data)
# storage valid row
if None not in row_data:
valid_row = row_data
# save data
book = Workbook()
new_sheet = book.active
for row in data:
new_sheet.append(row)
book.save('new_data.xlsx')
输入:
0 1 2 3
0 B 212 80 1.2
1 C 212 80 1.3
2 D B60 60 1.4
3 None None 60 1.5
4 E 212 80 1.6
5 None None 60 1.7
6 None None 60 1.8
输出:
0 1 2 3
0 B 212 80 1.2
1 C 212 80 1.3
2 D B60 60 1.4
3 D B60 60 1.5
4 E 212 80 1.6
5 E 212 60 1.7
6 E 212 60 1.8
我修改了我的代码,它起作用了。
import openpyxl
from openpyxl.utils import range_boundaries
wb = openpyxl.load_workbook('book1.xlsx')
sheet = wb.get_sheet_by_name('info')
all_data=[]
for row_index in range(1,sheet.max_row+1):
row=[]
for col_index in range(1,sheet.max_column+1):
vals = sheet.cell(row_index,col_index).value
if vals == None:
for crange in sheet.merged_cells:
clo,rlo,chi,rhi = crange.bounds
top_value = sheet.cell(rlo,clo).value
if rlo<=row_index and row_index<=rhi and clo<=col_index and col_index<=chi:
vals = top_value
print(vals)
break
row.append(vals)
all_data.append(row)
print(all_data)
for row in all_data:
sheet.append(row)
wb.save('bbbb.xlsx')
我正在尝试从具有 merged_cells_range 的 excel 文件中读取数据...但输出不是我的目标。请帮帮我
import openpyxl
wb = openpyxl.load_workbook('book1.xlsx')
sheet = wb.get_sheet_by_name('info')
all_data=[]
print(sheet.merged_cells.ranges)
for row_index in range(1,sheet.max_row+1):
row=[]
for col_index in range(1,sheet.max_column+1):
vals = sheet.cell(row_index,col_index).value
if vals =='':
for crange in sheet.merged_cells.ranges:
rlo,rhi,clo,chi = crange
if rlo<=row_index and row_index<rhi and clo<=col_index and col_index<chi:
vals = sheet.cell(rlo,clo).value
print(vals)
break
row.append(vals)
all_data.append(row)
print(all_data)
for row in all_data:
sheet.append(row)
wb.save('bbbb.xlsx')
我希望获得输出: [['06B', 'Daewoo BC 212', 80, 1373], ['06C', 'Daewoo BC 212', 80, 1020], ['06D', 'Transinco B60KL', 60, 1061], [ '06D', 'Transinco B60KL', 60, 19], ['06E', 'Daewoo BC 212', 80, 1020], ['06E', 'Daewoo BC 212', 60, 1061], ['06E ', 'Daewoo BC 212', 60, 19]] 但结果是:
[['06B', 'Daewoo BC 212', 80, 1373], ['06C', 'Daewoo BC 212', 80, 1020], ['06D', 'Transinco B60KL', 60 , 1061], [None, None, 60, 19], ['06E', 'Daewoo BC 212', 80, 1020], [None, None , 60, 1061], [None, None, 60, 19]]
my input output desired
好了:
=^..^=
import openpyxl
from openpyxl import Workbook
# load data
raw_data = openpyxl.load_workbook('data.xlsx')
select_sheet = raw_data['Sheet1']
# collect data from rows
valid_row = []
data = []
for row in select_sheet.iter_rows(max_row=select_sheet.max_row, max_col=select_sheet.max_column):
# get cell values
row_data = [cell.value for cell in row]
# handle merged cells
new_row_data = [0]*select_sheet.max_column
if None in row_data:
new_row_data[0] = valid_row[0]
new_row_data[1] = valid_row[1]
new_row_data[2] = row_data[2]
new_row_data[3] = row_data[3]
data.append(new_row_data)
else:
data.append(row_data)
# storage valid row
if None not in row_data:
valid_row = row_data
# save data
book = Workbook()
new_sheet = book.active
for row in data:
new_sheet.append(row)
book.save('new_data.xlsx')
输入:
0 1 2 3
0 B 212 80 1.2
1 C 212 80 1.3
2 D B60 60 1.4
3 None None 60 1.5
4 E 212 80 1.6
5 None None 60 1.7
6 None None 60 1.8
输出:
0 1 2 3
0 B 212 80 1.2
1 C 212 80 1.3
2 D B60 60 1.4
3 D B60 60 1.5
4 E 212 80 1.6
5 E 212 60 1.7
6 E 212 60 1.8
我修改了我的代码,它起作用了。
import openpyxl
from openpyxl.utils import range_boundaries
wb = openpyxl.load_workbook('book1.xlsx')
sheet = wb.get_sheet_by_name('info')
all_data=[]
for row_index in range(1,sheet.max_row+1):
row=[]
for col_index in range(1,sheet.max_column+1):
vals = sheet.cell(row_index,col_index).value
if vals == None:
for crange in sheet.merged_cells:
clo,rlo,chi,rhi = crange.bounds
top_value = sheet.cell(rlo,clo).value
if rlo<=row_index and row_index<=rhi and clo<=col_index and col_index<=chi:
vals = top_value
print(vals)
break
row.append(vals)
all_data.append(row)
print(all_data)
for row in all_data:
sheet.append(row)
wb.save('bbbb.xlsx')