可视化面对面记录的最佳方式是什么?
What is the best way to visualize the head-to-head records?
我的数据框如下所示:
A = pd.DataFrame({'team':[1,2,3,2,1,1,3,4,1,2], 'opp_team':[2,1,2,3,3,4,1,1,2,1], 'result':[1,0,1,0,1,1,0,0,1,0]})
结果栏有1代表胜利,0代表失败。我想找出在图表上呈现头对头记录的最佳方式。
我想了个pair plot,但是我觉得不行,因为它不会显示胜负数。在上面的例子中,1 队与 2 队交手 2 次,并且都赢了。因此,图表也应该显示计数。
我可以尝试解决这个问题吗?
只需将它们显示在 2 个单独的图中,一个显示 head-to-head 记录的总数,另一个显示一个团队对另一个团队的总胜率(另一个团队的获胜率 = 输掉率)团队)。
为此,我认为需要重组数据框,使其每行仅显示 1 个游戏 ID。为了便于分组,对 team
和 opp_team
进行排序,使 team
的索引始终小于 opp_team
的索引
我生成了一个示例数据集,我将如何总结和绘制它,供您参考:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def generate_data(num_data=100, num_teams=4):
team = np.random.randint(1,1 + num_teams,num_data)
opp_team = np.random.randint(1,1 + num_teams,num_data)
while len(opp_team[opp_team == team]) > 0:
opp_team[opp_team == team] = np.random.randint(1,1 + num_teams,len(opp_team[opp_team == team]))
results = np.round(np.random.rand(100))
return team, opp_team, results
def reorder_team(team, opp_team, result):
if team > opp_team:
team, opp_team = opp_team, team
result = 1 - result
return team, opp_team, result
# Generate data and get summary by team match-ups
team, opp_team, results = generate_data()
df = pd.DataFrame(data={'team':team,'opp_team':opp_team,'results':results}, dtype='int')
df = pd.DataFrame(df.apply(lambda x : reorder_team(x['team'], x['opp_team'], x['results']),axis=1).to_list(),
columns=df.columns)
df[['team','opp_team']] = 'Team ' + df[['team','opp_team']].astype(str)
df_summary = df.groupby(['team','opp_team']).agg(['sum','count'])
df_summary.columns = ['wins', 'total']
df_summary.reset_index(inplace=True)
df_summary['team_winrate'] = (df_summary['wins'] / df_summary['total'])
df_summary['opp_team_winrate'] = 1 - df_summary['team_winrate']
产生:
您可以使用以下脚本绘制它们(或使用您喜欢的库编写自己的脚本):
fig, (ax_count, ax_win) = plt.subplots(1,2, figsize=(12,6))
y_locs = list(range(len(df_summary)))
ax_count.barh(y_locs, width=df_summary['total'], color='tab:gray')
ax_count.set_yticks(y_locs)
ax_count.set_yticklabels(df_summary['team'] + ' VS ' + df_summary['opp_team'])
ax_count.set_title('Total No. of Match Ups')
ax_count.set_xticks([])
for loc in ['top','left','right','bottom']:
ax_count.spines[loc].set_visible(False)
for p in ax_count.patches:
ax_count.annotate(f'{p.get_width()}',
(p.get_x() + p.get_width(), p.get_y() + p.get_height()/2.),
ha='right', va='center', xytext=(-5,0), textcoords='offset points',
color='white',fontweight='heavy')
ax_win.barh(y_locs, width=df_summary['team_winrate'], color='tab:blue')
ax_win2 = ax_win.twinx()
ax_win2.barh(y_locs, width=df_summary['opp_team_winrate'],
left=df_summary['team_winrate'], color='tab:red')
ax_win.set_yticks(y_locs)
ax_win.set_yticklabels(df_summary['team'])
ax_win2.set_yticks(y_locs)
ax_win2.set_yticklabels(df_summary['opp_team'])
ax_win.set_xlim(0,1)
ax_win.set_title('Winning Rate')
ax_win.set_xticks([])
for loc in ['top','left','right','bottom']:
ax_win.spines[loc].set_visible(False)
ax_win2.spines[loc].set_visible(False)
for p in ax_win.patches:
ax_win.annotate(f'{p.get_width() * 100 :.0f} %',
(0, p.get_y() + p.get_height()/2.),
ha='left', va='center', xytext=(10,0), textcoords='offset points',
color='white',fontweight='heavy')
ax_win2.annotate(f'{(1 - p.get_width()) * 100 :.0f} %',
(1, p.get_y() + p.get_height()/2.),
ha='right', va='center', xytext=(-10,0), textcoords='offset points',
color='white',fontweight='heavy')
plt.show()
我的数据框如下所示:
A = pd.DataFrame({'team':[1,2,3,2,1,1,3,4,1,2], 'opp_team':[2,1,2,3,3,4,1,1,2,1], 'result':[1,0,1,0,1,1,0,0,1,0]})
结果栏有1代表胜利,0代表失败。我想找出在图表上呈现头对头记录的最佳方式。
我想了个pair plot,但是我觉得不行,因为它不会显示胜负数。在上面的例子中,1 队与 2 队交手 2 次,并且都赢了。因此,图表也应该显示计数。
我可以尝试解决这个问题吗?
只需将它们显示在 2 个单独的图中,一个显示 head-to-head 记录的总数,另一个显示一个团队对另一个团队的总胜率(另一个团队的获胜率 = 输掉率)团队)。
为此,我认为需要重组数据框,使其每行仅显示 1 个游戏 ID。为了便于分组,对 team
和 opp_team
进行排序,使 team
的索引始终小于 opp_team
我生成了一个示例数据集,我将如何总结和绘制它,供您参考:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def generate_data(num_data=100, num_teams=4):
team = np.random.randint(1,1 + num_teams,num_data)
opp_team = np.random.randint(1,1 + num_teams,num_data)
while len(opp_team[opp_team == team]) > 0:
opp_team[opp_team == team] = np.random.randint(1,1 + num_teams,len(opp_team[opp_team == team]))
results = np.round(np.random.rand(100))
return team, opp_team, results
def reorder_team(team, opp_team, result):
if team > opp_team:
team, opp_team = opp_team, team
result = 1 - result
return team, opp_team, result
# Generate data and get summary by team match-ups
team, opp_team, results = generate_data()
df = pd.DataFrame(data={'team':team,'opp_team':opp_team,'results':results}, dtype='int')
df = pd.DataFrame(df.apply(lambda x : reorder_team(x['team'], x['opp_team'], x['results']),axis=1).to_list(),
columns=df.columns)
df[['team','opp_team']] = 'Team ' + df[['team','opp_team']].astype(str)
df_summary = df.groupby(['team','opp_team']).agg(['sum','count'])
df_summary.columns = ['wins', 'total']
df_summary.reset_index(inplace=True)
df_summary['team_winrate'] = (df_summary['wins'] / df_summary['total'])
df_summary['opp_team_winrate'] = 1 - df_summary['team_winrate']
产生:
您可以使用以下脚本绘制它们(或使用您喜欢的库编写自己的脚本):
fig, (ax_count, ax_win) = plt.subplots(1,2, figsize=(12,6))
y_locs = list(range(len(df_summary)))
ax_count.barh(y_locs, width=df_summary['total'], color='tab:gray')
ax_count.set_yticks(y_locs)
ax_count.set_yticklabels(df_summary['team'] + ' VS ' + df_summary['opp_team'])
ax_count.set_title('Total No. of Match Ups')
ax_count.set_xticks([])
for loc in ['top','left','right','bottom']:
ax_count.spines[loc].set_visible(False)
for p in ax_count.patches:
ax_count.annotate(f'{p.get_width()}',
(p.get_x() + p.get_width(), p.get_y() + p.get_height()/2.),
ha='right', va='center', xytext=(-5,0), textcoords='offset points',
color='white',fontweight='heavy')
ax_win.barh(y_locs, width=df_summary['team_winrate'], color='tab:blue')
ax_win2 = ax_win.twinx()
ax_win2.barh(y_locs, width=df_summary['opp_team_winrate'],
left=df_summary['team_winrate'], color='tab:red')
ax_win.set_yticks(y_locs)
ax_win.set_yticklabels(df_summary['team'])
ax_win2.set_yticks(y_locs)
ax_win2.set_yticklabels(df_summary['opp_team'])
ax_win.set_xlim(0,1)
ax_win.set_title('Winning Rate')
ax_win.set_xticks([])
for loc in ['top','left','right','bottom']:
ax_win.spines[loc].set_visible(False)
ax_win2.spines[loc].set_visible(False)
for p in ax_win.patches:
ax_win.annotate(f'{p.get_width() * 100 :.0f} %',
(0, p.get_y() + p.get_height()/2.),
ha='left', va='center', xytext=(10,0), textcoords='offset points',
color='white',fontweight='heavy')
ax_win2.annotate(f'{(1 - p.get_width()) * 100 :.0f} %',
(1, p.get_y() + p.get_height()/2.),
ha='right', va='center', xytext=(-10,0), textcoords='offset points',
color='white',fontweight='heavy')
plt.show()