Python 创建新的数据框分组和汇总列
Python creating new dataframe grouping and summarizing column
我有下面的 SQL 查询,它创建了一个新的 table,按用户 ID 汇总每次访问计数。如何在 python 中创建此数据框?
create table User_Visits_summary as
select user_id,
sum(case when visit_type = 1 then 1 else 0 end) as Type_One_Counts,
sum(case when visit_type = 2 then 1 else 0 end) as Type_Two_Counts,
sum(case when visit_type = 3 then 1 else 0 end) as Type_Three_Counts,
count(*) as Total_Visits
from user_visits
group by user_id
下面的代码应该创建与 SQL 查询相同的 table。阅读代码中的注释,并在调试模式下执行以更好地理解每行代码的作用。有关 Pandas 函数的有用指南,请查看此备忘单 -
https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf
import pandas as pd
# example dataset
user_visits = pd.DataFrame({'user_id' :['A','A','A','A','A','B','B','B','B'],
'visit_type':[ 1, 1, 3, 3, 3, 2, 2, 2, 2] })
# This summary table already contains the data you want, but on 'long column' format
User_Visits_summary = user_visits.groupby(['user_id','visit_type']).size().reset_index()
# Here we pivot the table to get to your desired format
User_Visits_summary = User_Visits_summary.pivot(index='user_id',columns='visit_type', values=0)
# Calculate total from sub-totals in new column
User_Visits_summary['Total_Visits'] = User_Visits_summary.sum(axis=1)
# Some formatting
User_Visits_summary.reset_index(inplace=True)
User_Visits_summary.rename(columns={1:'Type_One_Counts',
2:'Type_Two_Counts',
3:'Type_Three_Counts'}, inplace=True)
# Table ready
print(User_Visits_summary)
# ...too wide to paste...
我有下面的 SQL 查询,它创建了一个新的 table,按用户 ID 汇总每次访问计数。如何在 python 中创建此数据框?
create table User_Visits_summary as
select user_id,
sum(case when visit_type = 1 then 1 else 0 end) as Type_One_Counts,
sum(case when visit_type = 2 then 1 else 0 end) as Type_Two_Counts,
sum(case when visit_type = 3 then 1 else 0 end) as Type_Three_Counts,
count(*) as Total_Visits
from user_visits
group by user_id
下面的代码应该创建与 SQL 查询相同的 table。阅读代码中的注释,并在调试模式下执行以更好地理解每行代码的作用。有关 Pandas 函数的有用指南,请查看此备忘单 -
https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf
import pandas as pd
# example dataset
user_visits = pd.DataFrame({'user_id' :['A','A','A','A','A','B','B','B','B'],
'visit_type':[ 1, 1, 3, 3, 3, 2, 2, 2, 2] })
# This summary table already contains the data you want, but on 'long column' format
User_Visits_summary = user_visits.groupby(['user_id','visit_type']).size().reset_index()
# Here we pivot the table to get to your desired format
User_Visits_summary = User_Visits_summary.pivot(index='user_id',columns='visit_type', values=0)
# Calculate total from sub-totals in new column
User_Visits_summary['Total_Visits'] = User_Visits_summary.sum(axis=1)
# Some formatting
User_Visits_summary.reset_index(inplace=True)
User_Visits_summary.rename(columns={1:'Type_One_Counts',
2:'Type_Two_Counts',
3:'Type_Three_Counts'}, inplace=True)
# Table ready
print(User_Visits_summary)
# ...too wide to paste...