如何复制一行但在 Pandas 中交换值
How to duplicate a row but with values swapped in Pandas
我有一个数据框,我想在其中复制行但交换值。原始显示为 df1
,所需为 df2
:
df1
GameID Team1ID Team2ID Team1Abb Team2Abb Team1PTS Team2PTS
0 1001 1023 TOR ATL 99 86
1 1004 1003 DAL POR 102 100
df2
GameID Team1ID Team2ID Team1Abb Team2Abb Team1PTS Team2PTS
0 1001 1023 TOR ATL 99 86
0 1023 1001 ATL TOR 86 99
1 1004 1003 DAL POR 102 100
1 1003 1004 POR DAL 100 102
rename
和 append
我们可以通过创建映射字典来重命名数据框的列,将 Team1
列名映射到 Team2
列名,反之亦然,然后附加重命名的数据框以复制行
c1 = df.filter(like='Team1').columns
c2 = c1.str.replace('Team1', 'Team2')
swap = df.rename(columns={**dict(zip(c1, c2)), **dict(zip(c2, c1))})
df.append(swap).sort_index(ignore_index=True)
GameID Team1ID Team2ID Team1Abb Team2Abb Team1PTS Team2PTS
0 0 1001 1023 TOR ATL 99 86
1 0 1023 1001 ATL TOR 86 99
2 1 1004 1003 DAL POR 102 100
3 1 1003 1004 POR DAL 100 102
使用 re.sub
重命名列的另一个选项:
import re
# replace Team1 with Team2 and Team2 with Team1
swappat = lambda m: '2' if m[0] == '1' else '1'
swapteam = lambda col: re.sub('(?<=^Team)[12]', swappat, col)
# append original data frame with new data frame that has name swapped
df.append(df.rename(columns=swapteam)).sort_index()
GameID Team1ID Team2ID Team1Abb Team2Abb Team1PTS Team2PTS
0 0 1001 1023 TOR ATL 99 86
0 0 1023 1001 ATL TOR 86 99
1 1 1004 1003 DAL POR 102 100
1 1 1003 1004 POR DAL 100 102
#The problem can be solved by putting the individual rows of dataframe in list
#Then the list should be updated by swapping between two adjacent values
#then add it to the original dataframe
def swapAdjList(df):
# as per the current problem the value of GameId should be replicated so ignoring it by minus 1
number_of_cols=len(df.columns) -1
number_of_rows=len(df.index)
number_of_iter=int(number_of_cols/2)
result_list=[]
for row in range(number_of_rows):
result_list_temp=list(df.iloc[row,1:len(df.columns)])
k=0
for i in range(number_of_iter):
result_list_temp[k],result_list_temp[k+1]=result_list_temp[k+1],result_list_temp[k]
#shifting the pointer column after two coluns
k=k+2
#adding the gameID as per the list
list_gameId=int(df.iloc[row,0:1])
result_list_temp.insert(0,list_gameId)
result_list.append(result_list_temp)
return pd.DataFrame(result_list,columns=['GameID','Team1ID','Team2ID','Team1Abb','Team2Abb','Team1PTS','Team2PTS'])
#calling function
import pandas as pd
data=[[0,1001,1023,'TOR','ATL',99,86],[1,1004,1003,'DAL','POR',102,100]]
df=pd.DataFrame(data,columns=['GameID','Team1ID','Team2ID','Team1Abb','Team2Abb','Team1PTS','Team2PTS'])
df1=swapAdjList(df)
df=df.append(df1)
df.sort_index()
实现预期结果的更简单的逻辑
df2 = df1.copy()
df2.columns = ["GameID", "Team2ID", "Team1ID", "Team2Abb", "Team1Ab", "Team2PTS", "Team1PTS"]
df1.append(df2).sort_index(ignore_index=True)
我有一个数据框,我想在其中复制行但交换值。原始显示为 df1
,所需为 df2
:
df1
GameID Team1ID Team2ID Team1Abb Team2Abb Team1PTS Team2PTS
0 1001 1023 TOR ATL 99 86
1 1004 1003 DAL POR 102 100
df2
GameID Team1ID Team2ID Team1Abb Team2Abb Team1PTS Team2PTS
0 1001 1023 TOR ATL 99 86
0 1023 1001 ATL TOR 86 99
1 1004 1003 DAL POR 102 100
1 1003 1004 POR DAL 100 102
rename
和 append
我们可以通过创建映射字典来重命名数据框的列,将 Team1
列名映射到 Team2
列名,反之亦然,然后附加重命名的数据框以复制行
c1 = df.filter(like='Team1').columns
c2 = c1.str.replace('Team1', 'Team2')
swap = df.rename(columns={**dict(zip(c1, c2)), **dict(zip(c2, c1))})
df.append(swap).sort_index(ignore_index=True)
GameID Team1ID Team2ID Team1Abb Team2Abb Team1PTS Team2PTS
0 0 1001 1023 TOR ATL 99 86
1 0 1023 1001 ATL TOR 86 99
2 1 1004 1003 DAL POR 102 100
3 1 1003 1004 POR DAL 100 102
使用 re.sub
重命名列的另一个选项:
import re
# replace Team1 with Team2 and Team2 with Team1
swappat = lambda m: '2' if m[0] == '1' else '1'
swapteam = lambda col: re.sub('(?<=^Team)[12]', swappat, col)
# append original data frame with new data frame that has name swapped
df.append(df.rename(columns=swapteam)).sort_index()
GameID Team1ID Team2ID Team1Abb Team2Abb Team1PTS Team2PTS
0 0 1001 1023 TOR ATL 99 86
0 0 1023 1001 ATL TOR 86 99
1 1 1004 1003 DAL POR 102 100
1 1 1003 1004 POR DAL 100 102
#The problem can be solved by putting the individual rows of dataframe in list
#Then the list should be updated by swapping between two adjacent values
#then add it to the original dataframe
def swapAdjList(df):
# as per the current problem the value of GameId should be replicated so ignoring it by minus 1
number_of_cols=len(df.columns) -1
number_of_rows=len(df.index)
number_of_iter=int(number_of_cols/2)
result_list=[]
for row in range(number_of_rows):
result_list_temp=list(df.iloc[row,1:len(df.columns)])
k=0
for i in range(number_of_iter):
result_list_temp[k],result_list_temp[k+1]=result_list_temp[k+1],result_list_temp[k]
#shifting the pointer column after two coluns
k=k+2
#adding the gameID as per the list
list_gameId=int(df.iloc[row,0:1])
result_list_temp.insert(0,list_gameId)
result_list.append(result_list_temp)
return pd.DataFrame(result_list,columns=['GameID','Team1ID','Team2ID','Team1Abb','Team2Abb','Team1PTS','Team2PTS'])
#calling function
import pandas as pd
data=[[0,1001,1023,'TOR','ATL',99,86],[1,1004,1003,'DAL','POR',102,100]]
df=pd.DataFrame(data,columns=['GameID','Team1ID','Team2ID','Team1Abb','Team2Abb','Team1PTS','Team2PTS'])
df1=swapAdjList(df)
df=df.append(df1)
df.sort_index()
实现预期结果的更简单的逻辑
df2 = df1.copy()
df2.columns = ["GameID", "Team2ID", "Team1ID", "Team2Abb", "Team1Ab", "Team2PTS", "Team1PTS"]
df1.append(df2).sort_index(ignore_index=True)