如何使用 openpyxl / pandas 或任何 python 将我从几张 excel 工作表中提取的字符串数据保存到新工作簿?
How do I save the string data I have extracted from several excel sheets to a new workbook using openpyxl / pandas or anything python?
Stack overflow 社区的第二个问题 - 我还不太擅长这个....
我正在尝试编写一些代码
- 打开一系列 excel 文档并找到 sheet 'Moderated'
- 从多个单元格中提取值
- 将数据重新排列到新的 excel sheet 中,每个单独的传播sheet 表示为新的一行单元格
我认为我已经实现了上面列表中的 1 和 2 - 尽管值以字符串形式返回并且这似乎导致保存到 excel 时出现问题。草率的导入代码部分反映了我到目前为止探索的选项...
`import sys
import os
import openpyxl
import pandas as pd
import numpy as np
import glob
from openpyxl.workbook import workbook
from openpyxl import load_workbook
path=r'C:\Users\longr\Desktop\pfile\sandbox' #working directory
filenames = glob.glob(path + "/*.xlsx")#lists all excel files
for file in filenames:
wb1 = load_workbook(file, data_only=True)#works
ws1=wb1['Moderated']#works
for row in ws1.iter_rows(min_row=3,max_row=7,min_col=5,max_col=5):
for cell in row:
a=(cell.value)
print (a) #works
for row in ws1.iter_rows(min_row=3,max_row=7,min_col=7,max_col=7):
for cell in row:
b=(cell.value)
print (b)
print(type(a))
writer = pd.ExcelWriter(r'C:\users\longr\Desktop\pfile\sandbox\Out\Out.xlsx', engine='openpyxl')
df.to_excel(writer, index=True)`
到目前为止的输出...
Sheet 1 文本 1 (e2)
Sheet 1 个文本 2 (e4)
Sheet 1 个文本 3 (e5)
None
Sheet 1 个文本 4 (e7)
Sheet 1 个文本 5 (g3)
Sheet 1 个文本 6 (g4)
Sheet 1 个文本 7 (g5)
Sheet 1 文本 8 (g6)
Sheet 1 个文本 9 (g7)
sheet 2 文本 1 (e2)
sheet 2 文本 2 (e4)
sheet 2 文本 3 (e5)
None
sheet 2 文本 4 (e7)
sheet 2 文本 5 (g3)
sheet 2 文本 6 (g4)
sheet 2 文本 7 (g5)
Sheet 2 文本 8 (g6)
sheet 2 文本 9 (g7)
我最终想要的是..
任何帮助将不胜感激 - 但尤其是针对新手程序员的帮助
感谢 JONAS 建议使用下面的代码 - 输出现在看起来像这样
5 列而不是我想要的 9 列。我也想用不同的标题,所以H1 / HA / Header A只是一个书签,我第一次问这个问题时不清楚
Jonas - 你的代码比我的代码好得多[更优雅!]
使用建议的代码
#LATEST
import sys
import os
import openpyxl
import pandas as pd
import numpy as np
import glob
from openpyxl.workbook import workbook
from openpyxl import load_workbook
from openpyxl import writer
path=r'C:\Users\longr\Desktop\pfile\sandbox' #working directory
filenames = glob.glob(path + "/*.xlsx")#lists all excel files
new_df=[] #create new DataFrame
#create a new list, which will be your result
for file in filenames:
wb1 = load_workbook(file, data_only=True)#works
ws1=wb1['Moderated']#works
a = [] #list for values in col = 5
b = [] #list for values in col = 7
for row in ws1.iter_rows(min_row=3,max_row=7,min_col=5,max_col=7): #use the loop to directly get the values from column 5 and 7.
for i, cell in enumerate(row):
if i == 0: a.append(cell.value) # save cell of col = 5 value into list a
if i == 2: b.append(cell.value) # save cell of col = 7 value into list b
new_df.append(a+b) #append list a and b to your bigger list for each excel-fi
import string
alphabet = string.ascii_uppercase[:27] # Alphabet for column names (header A, header B, ...)
df = pd.DataFrame(new_df, columns = ['header ' + alphabet[i] for i in range(len(new_df[0]))]) #create new DataFrame
writer = pd.ExcelWriter(r'C:\users\longr\Desktop\pfile\sandbox\out\out.xlsx', engine='openpyxl')
wb.save(r'C:\users\longr\Desktop\pfile\sandbox\out\out.xlsx')
print('Spreadsheet saved')#works
错误代码:NameError:名称'wb'未定义
所以您可以尝试将单元格的值保存到 list
中,然后再次将此列表保存到每个 excel 文件的 list
中,这将是你的新 DataFrame
:
new_df = [] #create a new list, which will be your result
for file in filenames:
wb1 = load_workbook(file, data_only=True)#works
ws1=wb1['Moderated']#works
a = [] #list for values in col = 5
b = [] #list for values in col = 7
for row in ws1.iter_rows(min_row=3,max_row=7,min_col=5,max_col=7): #use the loop to directly get the values from column 5 and 7.
for i, cell in enumerate(row):
if i == 0: a.append(cell.value) # save cell of col = 5 value into list a
if i == 2: b.append(cell.value) # save cell of col = 7 value into list b
new_df.append(a+b) #append list a and b to your bigger list for each excel-file
import string
alphabet = string.ascii_uppercase[:27] # Alphabet for column names (header A, header B, ...)
df = pd.DataFrame(new_df, columns = ['header ' + alphabet[i] for i in range(len(new_df[0]))]) #create new DataFrame
with pd.ExcelWriter('C:\users\longr\Desktop\pfile\sandbox\Out\Out.xlsx') as writer:
df.to_excel(writer)
Stack overflow 社区的第二个问题 - 我还不太擅长这个....
我正在尝试编写一些代码
- 打开一系列 excel 文档并找到 sheet 'Moderated'
- 从多个单元格中提取值
- 将数据重新排列到新的 excel sheet 中,每个单独的传播sheet 表示为新的一行单元格
我认为我已经实现了上面列表中的 1 和 2 - 尽管值以字符串形式返回并且这似乎导致保存到 excel 时出现问题。草率的导入代码部分反映了我到目前为止探索的选项...
`import sys
import os
import openpyxl
import pandas as pd
import numpy as np
import glob
from openpyxl.workbook import workbook
from openpyxl import load_workbook
path=r'C:\Users\longr\Desktop\pfile\sandbox' #working directory
filenames = glob.glob(path + "/*.xlsx")#lists all excel files
for file in filenames:
wb1 = load_workbook(file, data_only=True)#works
ws1=wb1['Moderated']#works
for row in ws1.iter_rows(min_row=3,max_row=7,min_col=5,max_col=5):
for cell in row:
a=(cell.value)
print (a) #works
for row in ws1.iter_rows(min_row=3,max_row=7,min_col=7,max_col=7):
for cell in row:
b=(cell.value)
print (b)
print(type(a))
writer = pd.ExcelWriter(r'C:\users\longr\Desktop\pfile\sandbox\Out\Out.xlsx', engine='openpyxl')
df.to_excel(writer, index=True)`
到目前为止的输出...
Sheet 1 文本 1 (e2)
Sheet 1 个文本 2 (e4)
Sheet 1 个文本 3 (e5)
None
Sheet 1 个文本 4 (e7)
Sheet 1 个文本 5 (g3)
Sheet 1 个文本 6 (g4)
Sheet 1 个文本 7 (g5)
Sheet 1 文本 8 (g6)
Sheet 1 个文本 9 (g7)
sheet 2 文本 1 (e2)
sheet 2 文本 2 (e4)
sheet 2 文本 3 (e5)
None
sheet 2 文本 4 (e7)
sheet 2 文本 5 (g3)
sheet 2 文本 6 (g4)
sheet 2 文本 7 (g5)
Sheet 2 文本 8 (g6)
sheet 2 文本 9 (g7)
我最终想要的是..
任何帮助将不胜感激 - 但尤其是针对新手程序员的帮助
感谢 JONAS 建议使用下面的代码 - 输出现在看起来像这样
Jonas - 你的代码比我的代码好得多[更优雅!]
使用建议的代码
#LATEST
import sys
import os
import openpyxl
import pandas as pd
import numpy as np
import glob
from openpyxl.workbook import workbook
from openpyxl import load_workbook
from openpyxl import writer
path=r'C:\Users\longr\Desktop\pfile\sandbox' #working directory
filenames = glob.glob(path + "/*.xlsx")#lists all excel files
new_df=[] #create new DataFrame
#create a new list, which will be your result
for file in filenames:
wb1 = load_workbook(file, data_only=True)#works
ws1=wb1['Moderated']#works
a = [] #list for values in col = 5
b = [] #list for values in col = 7
for row in ws1.iter_rows(min_row=3,max_row=7,min_col=5,max_col=7): #use the loop to directly get the values from column 5 and 7.
for i, cell in enumerate(row):
if i == 0: a.append(cell.value) # save cell of col = 5 value into list a
if i == 2: b.append(cell.value) # save cell of col = 7 value into list b
new_df.append(a+b) #append list a and b to your bigger list for each excel-fi
import string
alphabet = string.ascii_uppercase[:27] # Alphabet for column names (header A, header B, ...)
df = pd.DataFrame(new_df, columns = ['header ' + alphabet[i] for i in range(len(new_df[0]))]) #create new DataFrame
writer = pd.ExcelWriter(r'C:\users\longr\Desktop\pfile\sandbox\out\out.xlsx', engine='openpyxl')
wb.save(r'C:\users\longr\Desktop\pfile\sandbox\out\out.xlsx')
print('Spreadsheet saved')#works
错误代码:NameError:名称'wb'未定义
所以您可以尝试将单元格的值保存到 list
中,然后再次将此列表保存到每个 excel 文件的 list
中,这将是你的新 DataFrame
:
new_df = [] #create a new list, which will be your result
for file in filenames:
wb1 = load_workbook(file, data_only=True)#works
ws1=wb1['Moderated']#works
a = [] #list for values in col = 5
b = [] #list for values in col = 7
for row in ws1.iter_rows(min_row=3,max_row=7,min_col=5,max_col=7): #use the loop to directly get the values from column 5 and 7.
for i, cell in enumerate(row):
if i == 0: a.append(cell.value) # save cell of col = 5 value into list a
if i == 2: b.append(cell.value) # save cell of col = 7 value into list b
new_df.append(a+b) #append list a and b to your bigger list for each excel-file
import string
alphabet = string.ascii_uppercase[:27] # Alphabet for column names (header A, header B, ...)
df = pd.DataFrame(new_df, columns = ['header ' + alphabet[i] for i in range(len(new_df[0]))]) #create new DataFrame
with pd.ExcelWriter('C:\users\longr\Desktop\pfile\sandbox\Out\Out.xlsx') as writer:
df.to_excel(writer)