使用 group by 和 summarize 将新行添加到数据框
Adding new row to dataframe with group by and summarise
我想将我的 df 按三列分组并添加一个新行,该行将是第四列的总和。
我的数据看起来像
fc <- c("F", "F", "E", "E", "TF", "TF")
group_code <- c("Egg_x", "Egg_y", "Egg_x", "Egg_y", "Egg_x", "Egg_y")
id <- c(1, 1, 1, 1, 1, 1)
value <- c(2, 21, 4, 3, 20, 15)
df <-data.frame(cbind(fc, group_code, id, value))
> df
fc group_code id value
1 F Egg_x 1 2
2 F Egg_y 1 21
3 E Egg_x 1 4
4 E Egg_y 1 3
5 TF Egg_x 1 20
6 TF Egg_y 1 15
在这个例子中,我想创建一个包含 Egg_x
和 Egg_y
的新组,我可以使用 df$group <- sub('\_.*', '', df$group_code)
,我们有
> df
fc group_code id value main_group
1 F Egg_x 1 2 Egg
2 F Egg_y 1 21 Egg
3 E Egg_x 1 4 Egg
4 E Egg_y 1 3 Egg
5 TF Egg_x 1 20 Egg
6 TF Egg_y 1 15 Egg
我想为 fc
列的每个值添加一个新行 wehreby 我对 fc、id 和 main_group 进行分组并获得值列的总和。
我的最终 df 应该是这样的:
> df
fc group_code id value main_group
1 F Egg_x 1 2 Egg
2 F Egg_y 1 21 Egg
3 F Egg 1 23 Egg
4 E Egg_x 1 4 Egg
5 E Egg_y 1 3 Egg
6 E Egg 1 7 Egg
7 TF Egg_x 1 20 Egg
8 TF Egg_y 1 15 Egg
9 TF Egg 1 35 Egg
在上面的 df 中,每三行的值元素是前两个元素的总和。
谢谢
将您的数据框重塑为宽格式,然后创建一个新列Egg = Egg_x + Egg_y
并转换回长格式
library(tidyverse)
df %>%
spread(group_code, value) %>%
mutate(Egg = Egg_x + Egg_y) %>%
gather(key = "group_code", value, -fc, -id) %>%
arrange(fc)
#> fc id group_code value
#> 1 E 1 Egg_x 4
#> 2 E 1 Egg_y 3
#> 3 E 1 Egg 7
#> 4 F 1 Egg_x 2
#> 5 F 1 Egg_y 21
#> 6 F 1 Egg 23
#> 7 TF 1 Egg_x 20
#> 8 TF 1 Egg_y 15
#> 9 TF 1 Egg 35
由 reprex package (v0.3.0)
于 2019-11-05 创建
首先,我们将创建一个包含摘要行的单独数据框 - df_sum
:
library(dplyr)
library(forcats)
df <-
tibble(
fc = c("F", "F", "E", "E", "TF", "TF"),
group_code = c("Egg_x", "Egg_y", "Egg_x", "Egg_y", "Egg_x", "Egg_y"),
id = c(1, 1, 1, 1, 1, 1),
value = c(2, 21, 4, 3, 20, 15)
) %>%
mutate(main_group = sub('\_.*', '', group_code))
df_sum <-
df %>%
group_by(fc, main_group, id) %>%
summarise(value = sum(value)) %>%
mutate(group_code = main_group)
df_sum
#> # A tibble: 3 x 5
#> # Groups: fc, main_group [3]
#> fc main_group id value group_code
#> <chr> <chr> <dbl> <dbl> <chr>
#> 1 E Egg 1 7 Egg
#> 2 F Egg 1 23 Egg
#> 3 TF Egg 1 35 Egg
然后我们绑定到原来的df上,然后排列
res <-
bind_rows(df, df_sum) %>%
# fct_inorder to make sure summary rows appear after
# original rows after sorting
mutate(group_code = fct_inorder(group_code)) %>%
arrange(fc, main_group, id, group_code) %>%
mutate()
res
#> # A tibble: 9 x 5
#> fc group_code id value main_group
#> <chr> <fct> <dbl> <dbl> <chr>
#> 1 E Egg_x 1 4 Egg
#> 2 E Egg_y 1 3 Egg
#> 3 E Egg 1 7 Egg
#> 4 F Egg_x 1 2 Egg
#> 5 F Egg_y 1 21 Egg
#> 6 F Egg 1 23 Egg
#> 7 TF Egg_x 1 20 Egg
#> 8 TF Egg_y 1 15 Egg
#> 9 TF Egg 1 35 Egg
我想将我的 df 按三列分组并添加一个新行,该行将是第四列的总和。
我的数据看起来像
fc <- c("F", "F", "E", "E", "TF", "TF")
group_code <- c("Egg_x", "Egg_y", "Egg_x", "Egg_y", "Egg_x", "Egg_y")
id <- c(1, 1, 1, 1, 1, 1)
value <- c(2, 21, 4, 3, 20, 15)
df <-data.frame(cbind(fc, group_code, id, value))
> df
fc group_code id value
1 F Egg_x 1 2
2 F Egg_y 1 21
3 E Egg_x 1 4
4 E Egg_y 1 3
5 TF Egg_x 1 20
6 TF Egg_y 1 15
在这个例子中,我想创建一个包含 Egg_x
和 Egg_y
的新组,我可以使用 df$group <- sub('\_.*', '', df$group_code)
,我们有
> df
fc group_code id value main_group
1 F Egg_x 1 2 Egg
2 F Egg_y 1 21 Egg
3 E Egg_x 1 4 Egg
4 E Egg_y 1 3 Egg
5 TF Egg_x 1 20 Egg
6 TF Egg_y 1 15 Egg
我想为 fc
列的每个值添加一个新行 wehreby 我对 fc、id 和 main_group 进行分组并获得值列的总和。
我的最终 df 应该是这样的:
> df
fc group_code id value main_group
1 F Egg_x 1 2 Egg
2 F Egg_y 1 21 Egg
3 F Egg 1 23 Egg
4 E Egg_x 1 4 Egg
5 E Egg_y 1 3 Egg
6 E Egg 1 7 Egg
7 TF Egg_x 1 20 Egg
8 TF Egg_y 1 15 Egg
9 TF Egg 1 35 Egg
在上面的 df 中,每三行的值元素是前两个元素的总和。
谢谢
将您的数据框重塑为宽格式,然后创建一个新列Egg = Egg_x + Egg_y
并转换回长格式
library(tidyverse)
df %>%
spread(group_code, value) %>%
mutate(Egg = Egg_x + Egg_y) %>%
gather(key = "group_code", value, -fc, -id) %>%
arrange(fc)
#> fc id group_code value
#> 1 E 1 Egg_x 4
#> 2 E 1 Egg_y 3
#> 3 E 1 Egg 7
#> 4 F 1 Egg_x 2
#> 5 F 1 Egg_y 21
#> 6 F 1 Egg 23
#> 7 TF 1 Egg_x 20
#> 8 TF 1 Egg_y 15
#> 9 TF 1 Egg 35
由 reprex package (v0.3.0)
于 2019-11-05 创建首先,我们将创建一个包含摘要行的单独数据框 - df_sum
:
library(dplyr)
library(forcats)
df <-
tibble(
fc = c("F", "F", "E", "E", "TF", "TF"),
group_code = c("Egg_x", "Egg_y", "Egg_x", "Egg_y", "Egg_x", "Egg_y"),
id = c(1, 1, 1, 1, 1, 1),
value = c(2, 21, 4, 3, 20, 15)
) %>%
mutate(main_group = sub('\_.*', '', group_code))
df_sum <-
df %>%
group_by(fc, main_group, id) %>%
summarise(value = sum(value)) %>%
mutate(group_code = main_group)
df_sum
#> # A tibble: 3 x 5
#> # Groups: fc, main_group [3]
#> fc main_group id value group_code
#> <chr> <chr> <dbl> <dbl> <chr>
#> 1 E Egg 1 7 Egg
#> 2 F Egg 1 23 Egg
#> 3 TF Egg 1 35 Egg
然后我们绑定到原来的df上,然后排列
res <-
bind_rows(df, df_sum) %>%
# fct_inorder to make sure summary rows appear after
# original rows after sorting
mutate(group_code = fct_inorder(group_code)) %>%
arrange(fc, main_group, id, group_code) %>%
mutate()
res
#> # A tibble: 9 x 5
#> fc group_code id value main_group
#> <chr> <fct> <dbl> <dbl> <chr>
#> 1 E Egg_x 1 4 Egg
#> 2 E Egg_y 1 3 Egg
#> 3 E Egg 1 7 Egg
#> 4 F Egg_x 1 2 Egg
#> 5 F Egg_y 1 21 Egg
#> 6 F Egg 1 23 Egg
#> 7 TF Egg_x 1 20 Egg
#> 8 TF Egg_y 1 15 Egg
#> 9 TF Egg 1 35 Egg