python 2.7 从 excel 工作簿中提取数据的程序 - 为什么需要将数据保存在与文件相同的文件夹中?
python 2.7 program to extract data from excel workbooks - why does it need to be saved in the same folder as the files?
我有一个使用 openpyxl、os 和 tkinter 的程序,它允许人们选择 ose 文件目录,然后从 excel 文件的某些单元格中提取数据那个目录。按原样,只有 运行 如果 python 文件与要从中提取数据的文件位于同一文件夹中。
我想使程序文件可以存储在该文件夹之外,但我无法根据我的代码弄清楚为什么它需要在该文件夹内。有人可以指出代码中需要这样做的地方吗?
谢谢
#!/usr/bin/env python
import os
import openpyxl
import Tkinter as tk
from Tkinter import *
import tkFileDialog, tkMessageBox, ttk
def file_open():
file_path = tkFileDialog.askdirectory()
if file_path == "":
tkMessageBox.showinfo("Error", "No Folder Selected")
else:
ALL_SHEETS = [f for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
HEAD = 1
ROW = 2
START = 1
END = 11
OUTFILE = 'empty_book.xlsx'
def get_row(sht, start, end, row):
row_data = []
for col in range(start, end):
d = sht.cell(row=row, column=col)
row_data.append(d.value)
return row_data
def get_all(files):
data_rows = []
for f in files:
wb = openpyxl.load_workbook(filename=f, data_only=True)
sheet = wb.get_sheet_by_name('Data')
row = get_row(sheet, START, END, ROW)
data_rows.append(row)
return data_rows
def get_headings(sheets):
first = sheets[1]
wb = openpyxl.load_workbook(filename=first)
sheet = wb.get_sheet_by_name('Data')
row = get_row(sheet, START, END, HEAD)
return row
def write_new(header, data, f):
wb = openpyxl.Workbook()
ws1 = wb.active
ws1.title = 'Data'
ws1.append(header)
for row in data:
ws1.append(row)
wb.save(filename=f)
def together():
sheets = sorted(ALL_SHEETS)
header = get_headings(sheets)
data = get_all(sheets)
write_new(header, data, OUTFILE)
together()
tkMessageBox.showinfo("Great Job!", "Data Extraction Successful!")
class NSC(tk.Frame):
def __init__(self, parent):
tk.Frame.__init__(self, parent)
self.parent = parent
self.parent.title("Degree Planner Data Extractor")
l1 = tk.Label(text="Degree Planner Data Extractor", font=('Segui',
20))
l1.place(x = 35, y = 20)
nscButton = tk.Button(text=' Extract data from degree planners ',
command=file_open)
nscButton.place(x= 80, y=100)
quitButton = tk.Button(text=" Quit ", command=self.quit)
quitButton.place(x=155, y=155)
def main():
root = Tk()
w = 400
h = 250
ws = root.winfo_screenwidth() # width of the screen
hs = root.winfo_screenheight() # height of the screen
x = (ws/2) - (w/2)
y = (hs/2) - (h/2)
root.geometry('%dx%d+%d+%d' % (w, h, x, y))
root.resizable(0,0)
app = NSC(root)
root.mainloop()
if __name__ == '__main__':
main()
您已经解决了代码中的问题。 os.listdir
returns 没有路径的文件名,因此 isfile
测试需要 os.path.join
。您需要将该加入的名称添加到您的列表中。
ALL_SHEETS = [os.path.join(file_path, f) for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
glob.glob
做几乎相同的事情,但有人将目录命名为“.xlsx”的风险很小。
from glob import glob
ALL_SHEETS = [f for f in glob(os.path.join(file_path, "*.xlsx"))
if os.path.isfile(f)]]
我有一个使用 openpyxl、os 和 tkinter 的程序,它允许人们选择 ose 文件目录,然后从 excel 文件的某些单元格中提取数据那个目录。按原样,只有 运行 如果 python 文件与要从中提取数据的文件位于同一文件夹中。
我想使程序文件可以存储在该文件夹之外,但我无法根据我的代码弄清楚为什么它需要在该文件夹内。有人可以指出代码中需要这样做的地方吗?
谢谢
#!/usr/bin/env python
import os
import openpyxl
import Tkinter as tk
from Tkinter import *
import tkFileDialog, tkMessageBox, ttk
def file_open():
file_path = tkFileDialog.askdirectory()
if file_path == "":
tkMessageBox.showinfo("Error", "No Folder Selected")
else:
ALL_SHEETS = [f for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
HEAD = 1
ROW = 2
START = 1
END = 11
OUTFILE = 'empty_book.xlsx'
def get_row(sht, start, end, row):
row_data = []
for col in range(start, end):
d = sht.cell(row=row, column=col)
row_data.append(d.value)
return row_data
def get_all(files):
data_rows = []
for f in files:
wb = openpyxl.load_workbook(filename=f, data_only=True)
sheet = wb.get_sheet_by_name('Data')
row = get_row(sheet, START, END, ROW)
data_rows.append(row)
return data_rows
def get_headings(sheets):
first = sheets[1]
wb = openpyxl.load_workbook(filename=first)
sheet = wb.get_sheet_by_name('Data')
row = get_row(sheet, START, END, HEAD)
return row
def write_new(header, data, f):
wb = openpyxl.Workbook()
ws1 = wb.active
ws1.title = 'Data'
ws1.append(header)
for row in data:
ws1.append(row)
wb.save(filename=f)
def together():
sheets = sorted(ALL_SHEETS)
header = get_headings(sheets)
data = get_all(sheets)
write_new(header, data, OUTFILE)
together()
tkMessageBox.showinfo("Great Job!", "Data Extraction Successful!")
class NSC(tk.Frame):
def __init__(self, parent):
tk.Frame.__init__(self, parent)
self.parent = parent
self.parent.title("Degree Planner Data Extractor")
l1 = tk.Label(text="Degree Planner Data Extractor", font=('Segui',
20))
l1.place(x = 35, y = 20)
nscButton = tk.Button(text=' Extract data from degree planners ',
command=file_open)
nscButton.place(x= 80, y=100)
quitButton = tk.Button(text=" Quit ", command=self.quit)
quitButton.place(x=155, y=155)
def main():
root = Tk()
w = 400
h = 250
ws = root.winfo_screenwidth() # width of the screen
hs = root.winfo_screenheight() # height of the screen
x = (ws/2) - (w/2)
y = (hs/2) - (h/2)
root.geometry('%dx%d+%d+%d' % (w, h, x, y))
root.resizable(0,0)
app = NSC(root)
root.mainloop()
if __name__ == '__main__':
main()
您已经解决了代码中的问题。 os.listdir
returns 没有路径的文件名,因此 isfile
测试需要 os.path.join
。您需要将该加入的名称添加到您的列表中。
ALL_SHEETS = [os.path.join(file_path, f) for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
glob.glob
做几乎相同的事情,但有人将目录命名为“.xlsx”的风险很小。
from glob import glob
ALL_SHEETS = [f for f in glob(os.path.join(file_path, "*.xlsx"))
if os.path.isfile(f)]]