如何使用 OpenPyXL 遍历 Excel Table 中的所有行?

How to iterate over all rows in an Excel Table using OpenPyXL?

Excel 表的 OpenPyXL 文档没有提及如何迭代 table 中的值(参见 here)。

这样做的有效方法是什么?

我想出了以下功能来做到这一点。

from typing import Any, Dict, Generator

from openpyxl import load_workbook
from openpyxl.worksheet.table import Table
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.utils import rows_from_range

TableRow = Dict[str, Any]

def iter_table_rows(ws:Worksheet, tb:Table) -> Generator[TableRow, None, None]:
    """Iterate over rows from a table with headers (row as dictionary)"""
    def get_row_values(row_cell_ref):
        return [ws[c].value for c in row_cell_ref]
    
    iter_rows = rows_from_range(tb.ref)
    headers = get_row_values(next(iter_rows))
    
    for row_cells in iter_rows:
        yield {h:v for h,v in zip(headers, get_row_values(row_cells))}


wb = load_workbook("my_file.xlsx")
ws = wb.active

tb = ws.tables["MyTable"]
for row in iter_table_rows(ws, tb):
    print(row)

感谢您提供的功能。我添加了一个 ws 参数,因为 ws 不是全局变量并且他是 get_row_values.

所必需的
def iter_table_rows(ws:Worksheet, tb:Table) -> Generator[TableRow, None, None]:
"""Iterate over rows from a table with headers (row as dictionary)"""
    def get_row_values(ws:Worksheet,row_cell_ref:tuple):
        return [ws[c].value for c in row_cell_ref]

    iter_rows = rows_from_range(tb.ref)
    headers = get_row_values(ws,next(iter_rows))

    for row_cells in iter_rows:
        yield {h:v for h,v in zip(headers, get_row_values(ws,row_cells))}

目前我还没有看到其他方法。
并且 没有“父”属性可以知道哪个工作表 是 table。

好吧,最后我得出了一个不同的解决方案。
这是从 json 文件中的 xlsx table 中提取数据的函数代码。
我猜它更接近 openpyxl 的理念。数据存储在工作表中,table 似乎是一种指向范围的指针。在工作表中读取数据的最有效方法是使用 ws.iter_row 根据 min_row、max_row、min_col、max_col.[=12 指定范围=]

import json
import logging

from argparse import Namespace

from openpyxl import load_workbook
from openpyxl.worksheet.cell_range import CellRange

def xlsx_Json(args: Namespace):
    """Extract table values from an excel workbook in a json file

    Args:
        args (Namespace): parsed arguments
        args.workbook   : filename of the excel document
        args.worksheet  : name of the sheet to read
        args.table      : name of a table in worksheet

        args.output     : name of a output file (json format)
    Note:
        It's assumed that table exists in worksheet, and worksheet exists in workbook, and file exists
    """
    #wb = load_workbook(filename=args.workbook,read_only=True)  !!! read only worksheet doesn't have table ?
    wb = load_workbook(filename=args.workbook)
    ws = wb[args.worksheet]
    table = ws.tables[args.table]

    # table.ref needs to be converted in a CellRange, and use the range to read data in the worksheet
    table_range = CellRange(table.ref)
    tableJson={ args.table : [] }
    header=table.column_names

    for nb_rows,row in enumerate(ws.iter_rows(min_row=table_range.min_row+1, 
                                         min_col=table_range.min_col, 
                                         max_row=table_range.max_row, 
                                         max_col=table_range.max_col, 
                                         values_only=True )):
        #   min_row=range.min_row+1  skip the header row
    
        #   need to convert row element to string to make a serializablejson object
        #   value such datetime are not directly serializable
        if nb_rows == 0:
            for h,x in zip(header,row):
                logging.info(f"{h}:type-{type(x)}:value:{x}")
        if nb_rows % 100 == 0:
            # print row number every 100 lines
            logging.info(f"rows:{nb_rows}")
        row_Json=dict(zip(header,[str(x) for x in row]))        
        tableJson[args.table].append(row_Json)

    with open(args.json_output,"w") as file_out:
        json.dump(tableJson,file_out,indent=4)

    logging.info(f"{nb_rows} elements written in {args.json_output}")