如何使用 Python 复制 excel 文件中的每一行?
How can I duplicate every row in an excel file using Python?
我有一个 excel 文件,其中包含这样的行:
col1 | col2 | col3 | col4 | col5
anotherCol1 | anotherCol2 | anotherCol3 | anotherCol4 | anotherCol5
我需要复制每一行,使其看起来像这样:
col1 | col2 | col3 | col4 | col5
col1 | col2 | col3 | col4 | col5
anotherCol1 | anotherCol2 | anotherCol3 | anotherCol4 | anotherCol5
anotherCol1 | anotherCol2 | anotherCol3 | anotherCol4 | anotherCol5
这是我目前所拥有的,excel 文件是一个 .XLS 文件而不是 .XLSX 文件,所以我不能使用 openpyxl
除非有办法解决这个问题。
这是我目前拥有的:
def DuplicateEachRow(self):
import pandas as pd
import pathlib
full_path = str(pathlib.Path().absolute()) + '\' + new_loc
df = pd.read_excel(full_path, header=None, sheet_name=None)
# engine can be openpyxl if we need .xlsx ext
writer = pd.ExcelWriter(new_loc, engine='xlwt')
for key in df:
sheet = df[key]
sheet.to_excel(writer, key, index=False, header=False)
print(sheet)
# writer.save()
如何使用 'sheet' 即 dataframe
来复制每一行?
编辑:我也试过了...
def DuplicateEachRow(self):
import pandas as pd
import pathlib
full_path = str(pathlib.Path().absolute()) + '\' + new_loc
df = pd.read_excel(full_path, header=None, sheet_name='GTL | GWL Disclosures')
print(df)
# duplicate the rows:
dup_df = pd.concat([df, df], ignore_index=True)
# using openpyxl
with pd.ExcelWriter('path_to_file.xlsx') as writer:
dup_df.to_excel(writer)
但这只会写回一个 sheet 而不是原始工作簿
复制数据帧中行的一种方法是将数据帧与其自身连接:
pd.concat([df, df])
如果您想重置索引
pd.concat([df, df], ignore_index=True)
以你的例子为例:
def DuplicateEachRow(self):
import pandas as pd
import pathlib
full_path = str(pathlib.Path().absolute()) + '\' + new_loc
##This should give you the dataframe for which you want to duplicate the rows
##You should check this is the case, I don't have your .xls file.
##Sometimes the sheet name is "Sheet1" and not None)
df = pd.read_excel(full_path, header=None, sheet_name=None)
#duplicate the rows:
dup_df = pd.concat([df, df], ignore_index=True)
#using openpyxl
with pd.ExcelWriter('path_to_file.xlsx') as writer:
dup_df.to_excel(writer)
编辑:新版本在原始行之后插入每个重复的行并将新的sheet附加到同一文件
def DuplicateEachRow():
import pandas as pd
import pathlib
full_path = str(pathlib.Path().absolute()) + '\' + new_loc
df = pd.read_excel(full_path, header=None, sheet_name='GTL | GWL Disclosures')
print(df)
# duplicate the rows:
# keep the index, so you can sort the rows after
dup_df = pd.concat([df, df])
#sort the rows by the index so you have the duplicate one just after the initial one
dup_df.sort_index(inplace=True)
# using openpyxl
#open the file in append mode
with pd.ExcelWriter(new_loc, mode='a') as writer:
#use a new name for the new sheet
#don't save the header (dataframe columns names) and index (dataframe row names) in the new sheet
dup_df.to_excel(writer, sheet_name='Sheet3', header=None, index=None)
编辑:
此选项将根据索引号打开工作簿,读入数据,将行加倍,然后将加倍的行重写到您调用的 sheet,如果您不能使用,则避免使用 openpyxl它。至于 iloc
没有被发现,我不确定为什么会这样,它是 pandas 包的一个组成部分。也许尝试重新安装 pandas 到最新版本,但我已经成功了。
from xlrd import open_workbook
from xlutils.copy import copy
import pandas as pd
df = pd.read_excel('yourxls.xls',sheet_name='your_sheet',header=None)
df_new = pd.DataFrame()
for i in range(len(df)):
df_new = df_new.append(df.iloc[i])
df_new = df_new.append(df.iloc[i])
rb = open_workbook("yourxls.xls")
wb = copy(rb)
s = wb.get_sheet(1) #get sheet by index number
num_col = len(df_new.columns)
num_row = len(df_new)
df_vals = df_new.values
for i in range(num_row):
for j in range(num_col):
s.write(i,j, df_vals[i,j])
wb.save('yourxls.xls')
我确实需要 pip install 'xlrd'
和 'xlwt'
才能使用 .xls
格式
最好的方法是直接使用 openpyxl 执行此操作:从 sheet 的底部开始工作,插入一行并从其上方的行复制值。
我有一个 excel 文件,其中包含这样的行:
col1 | col2 | col3 | col4 | col5
anotherCol1 | anotherCol2 | anotherCol3 | anotherCol4 | anotherCol5
我需要复制每一行,使其看起来像这样:
col1 | col2 | col3 | col4 | col5
col1 | col2 | col3 | col4 | col5
anotherCol1 | anotherCol2 | anotherCol3 | anotherCol4 | anotherCol5
anotherCol1 | anotherCol2 | anotherCol3 | anotherCol4 | anotherCol5
这是我目前所拥有的,excel 文件是一个 .XLS 文件而不是 .XLSX 文件,所以我不能使用 openpyxl
除非有办法解决这个问题。
这是我目前拥有的:
def DuplicateEachRow(self):
import pandas as pd
import pathlib
full_path = str(pathlib.Path().absolute()) + '\' + new_loc
df = pd.read_excel(full_path, header=None, sheet_name=None)
# engine can be openpyxl if we need .xlsx ext
writer = pd.ExcelWriter(new_loc, engine='xlwt')
for key in df:
sheet = df[key]
sheet.to_excel(writer, key, index=False, header=False)
print(sheet)
# writer.save()
如何使用 'sheet' 即 dataframe
来复制每一行?
编辑:我也试过了...
def DuplicateEachRow(self):
import pandas as pd
import pathlib
full_path = str(pathlib.Path().absolute()) + '\' + new_loc
df = pd.read_excel(full_path, header=None, sheet_name='GTL | GWL Disclosures')
print(df)
# duplicate the rows:
dup_df = pd.concat([df, df], ignore_index=True)
# using openpyxl
with pd.ExcelWriter('path_to_file.xlsx') as writer:
dup_df.to_excel(writer)
但这只会写回一个 sheet 而不是原始工作簿
复制数据帧中行的一种方法是将数据帧与其自身连接:
pd.concat([df, df])
如果您想重置索引
pd.concat([df, df], ignore_index=True)
以你的例子为例:
def DuplicateEachRow(self):
import pandas as pd
import pathlib
full_path = str(pathlib.Path().absolute()) + '\' + new_loc
##This should give you the dataframe for which you want to duplicate the rows
##You should check this is the case, I don't have your .xls file.
##Sometimes the sheet name is "Sheet1" and not None)
df = pd.read_excel(full_path, header=None, sheet_name=None)
#duplicate the rows:
dup_df = pd.concat([df, df], ignore_index=True)
#using openpyxl
with pd.ExcelWriter('path_to_file.xlsx') as writer:
dup_df.to_excel(writer)
编辑:新版本在原始行之后插入每个重复的行并将新的sheet附加到同一文件
def DuplicateEachRow():
import pandas as pd
import pathlib
full_path = str(pathlib.Path().absolute()) + '\' + new_loc
df = pd.read_excel(full_path, header=None, sheet_name='GTL | GWL Disclosures')
print(df)
# duplicate the rows:
# keep the index, so you can sort the rows after
dup_df = pd.concat([df, df])
#sort the rows by the index so you have the duplicate one just after the initial one
dup_df.sort_index(inplace=True)
# using openpyxl
#open the file in append mode
with pd.ExcelWriter(new_loc, mode='a') as writer:
#use a new name for the new sheet
#don't save the header (dataframe columns names) and index (dataframe row names) in the new sheet
dup_df.to_excel(writer, sheet_name='Sheet3', header=None, index=None)
编辑:
此选项将根据索引号打开工作簿,读入数据,将行加倍,然后将加倍的行重写到您调用的 sheet,如果您不能使用,则避免使用 openpyxl它。至于 iloc
没有被发现,我不确定为什么会这样,它是 pandas 包的一个组成部分。也许尝试重新安装 pandas 到最新版本,但我已经成功了。
from xlrd import open_workbook
from xlutils.copy import copy
import pandas as pd
df = pd.read_excel('yourxls.xls',sheet_name='your_sheet',header=None)
df_new = pd.DataFrame()
for i in range(len(df)):
df_new = df_new.append(df.iloc[i])
df_new = df_new.append(df.iloc[i])
rb = open_workbook("yourxls.xls")
wb = copy(rb)
s = wb.get_sheet(1) #get sheet by index number
num_col = len(df_new.columns)
num_row = len(df_new)
df_vals = df_new.values
for i in range(num_row):
for j in range(num_col):
s.write(i,j, df_vals[i,j])
wb.save('yourxls.xls')
我确实需要 pip install 'xlrd'
和 'xlwt'
才能使用 .xls
格式
最好的方法是直接使用 openpyxl 执行此操作:从 sheet 的底部开始工作,插入一行并从其上方的行复制值。