python 2.7 从 excel 工作簿中提取数据的程序 - 为什么需要将数据保存在与文件相同的文件夹中?

python 2.7 program to extract data from excel workbooks - why does it need to be saved in the same folder as the files?

我有一个使用 openpyxl、os 和 tkinter 的程序,它允许人们选择 ose 文件目录,然后从 excel 文件的某些单元格中提取数据那个目录。按原样,只有 运行 如果 python 文件与要从中提取数据的文件位于同一文件夹中。

我想使程序文件可以存储在该文件夹之外,但我无法根据我的代码弄清楚为什么它需要在该文件夹内。有人可以指出代码中需要这样做的地方吗?

谢谢

#!/usr/bin/env python


import os
import openpyxl
import Tkinter as tk
from Tkinter import *
import tkFileDialog, tkMessageBox, ttk



def file_open():
    file_path = tkFileDialog.askdirectory()
    if file_path == "":
        tkMessageBox.showinfo("Error", "No Folder Selected")
    else:
        ALL_SHEETS = [f for f in os.listdir(file_path)
              if os.path.isfile(os.path.join(file_path, f))
              and f.endswith('.xlsx')]
        HEAD = 1
        ROW = 2
        START = 1
        END = 11

        OUTFILE = 'empty_book.xlsx'

        def get_row(sht, start, end, row):
            row_data = []
            for col in range(start, end):
                d = sht.cell(row=row, column=col)
                row_data.append(d.value)
            return row_data


        def get_all(files):
            data_rows = []
            for f in files:
                wb = openpyxl.load_workbook(filename=f, data_only=True)
                sheet = wb.get_sheet_by_name('Data')
                row = get_row(sheet, START, END, ROW)
                data_rows.append(row)
            return data_rows


        def get_headings(sheets):
            first = sheets[1]
            wb = openpyxl.load_workbook(filename=first)
            sheet = wb.get_sheet_by_name('Data')
            row = get_row(sheet, START, END, HEAD)
            return row


        def write_new(header, data, f):
            wb = openpyxl.Workbook()
            ws1 = wb.active
            ws1.title = 'Data'
            ws1.append(header)
            for row in data:
                ws1.append(row)
            wb.save(filename=f)

        def together():
            sheets = sorted(ALL_SHEETS)
            header = get_headings(sheets)
            data = get_all(sheets)
            write_new(header, data, OUTFILE)

        together()

        tkMessageBox.showinfo("Great Job!", "Data Extraction Successful!")


class NSC(tk.Frame):
    def __init__(self, parent):
        tk.Frame.__init__(self, parent)
        self.parent = parent
        self.parent.title("Degree Planner Data Extractor")
        l1 = tk.Label(text="Degree Planner Data Extractor", font=('Segui',         
             20))
        l1.place(x = 35, y = 20)
        nscButton = tk.Button(text=' Extract data from degree planners ',         
                    command=file_open)
        nscButton.place(x= 80, y=100)   
        quitButton = tk.Button(text=" Quit ", command=self.quit)
        quitButton.place(x=155, y=155)

def main():

    root = Tk()
    w = 400
    h = 250
    ws = root.winfo_screenwidth() # width of the screen
    hs = root.winfo_screenheight() # height of the screen
    x = (ws/2) - (w/2)
    y = (hs/2) - (h/2)
    root.geometry('%dx%d+%d+%d' % (w, h, x, y))
    root.resizable(0,0)
    app = NSC(root)
    root.mainloop() 

if __name__ == '__main__':

    main()

您已经解决了代码中的问题。 os.listdir returns 没有路径的文件名,因此 isfile 测试需要 os.path.join。您需要将该加入的名称添加到您的列表中。

    ALL_SHEETS = [os.path.join(file_path, f) for f in os.listdir(file_path)
          if os.path.isfile(os.path.join(file_path, f))
          and f.endswith('.xlsx')]

glob.glob 做几乎相同的事情,但有人将目录命名为“.xlsx”的风险很小。

    from glob import glob
    ALL_SHEETS = [f for f in glob(os.path.join(file_path, "*.xlsx"))
          if os.path.isfile(f)]]