使用 dplyr 连接组内的所有行
Concatenating all rows within a group using dplyr
假设我有这样一个数据框:
hand_id card_id card_name card_class
A 1 p alpha
A 2 q beta
A 3 r theta
B 2 q beta
B 3 r theta
B 4 s gamma
C 1 p alpha
C 2 q beta
我想将 card_id、card_name 和 card_class 连接成每一手级别 A、B、C 的一行。所以结果看起来像这样:
hand_id combo_1 combo_2 combo_3
A 1-2-3 p-q-r alpha-beta-theta
B 2-3-4 q-r-s beta-theta-gamma
....
我尝试使用 group_by 和变异来做到这一点,但我似乎无法让它工作
data <- read_csv('data.csv')
byHand <- group_by(data, hand_id) %>%
mutate(combo_1 = paste(card_id),
combo_2 = paste(card_name),
combo_3 = paste(card_class))
感谢您的帮助。
不太熟悉 dplyr
...所以这是我在没有 dplyr
的情况下的尝试
df <- read_csv('data.csv')
res <- lapply(split(df, df$hand_id),function(x){
sL <- apply(x[,-1], 2, function(y) paste(y, collapse = "-"))
d <- data.frame(x$hand_id[1], rbind(sL))
names(d) <- c("hand_id", "combo_1", "combo_2", "combo_3")
return(d)
})
res <- do.call("rbind",res)
rownames(res) <- NULL
这是输出:
## hand_id combo_1 combo_2 combo_3
## 1 A 1-2-3 p-q-r alpha-beta-theta
## 2 B 2-3-4 q-r-s beta-theta-gamma
## 3 C 1-2 p-q alpha-beta
你有点接近!
library(tidyr)
library(dplyr)
data <- read_csv('data.csv')
byHand <- group_by(data, hand_id) %>%
summarise(combo_1 = paste(card_id, collapse = "-"),
combo_2 = paste(card_name, collapse = "-"),
combo_3 = paste(card_class, collapse = "-"))
或使用summarise_each
:
byHand <- group_by(data, hand_id) %>%
summarise_each(funs(paste(., collapse = "-")))
这是另一个使用 data.table
的选项
library(data.table)
setDT(data)[, lapply(.SD, paste, collapse="-") , by = hand_id]
# hand_id card_id card_name card_class
#1: A 1-2-3 p-q-r alpha-beta-theta
#2: B 2-3-4 q-r-s beta-theta-gamma
#3: C 1-2 p-q alpha-beta
如果您的数据中有 NA,您可以使用 na.omit()
与 str_c()
内联。 unique()
如果您只想要独特的,也可以使用。
数据:
hand_id card_id card_name card_class
<chr> <dbl> <chr> <chr>
1 A 1 p alpha
2 A 2 q beta
3 A 3 r theta
4 A NA NA NA
5 B 2 q beta
6 B 3 r theta
7 B 4 s gamma
8 C 1 p alpha
9 C 2 q beta
代码:
data %>%
group_by(hand_id) %>%
summarize(card_id = str_c(na.omit(card_id), collapse = "-"),
card_name = str_c(na.omit(card_name), collapse = "-"),
card_class = str_c(na.omit(card_class), collapse = "-"))
输出:
hand_id card_id card_name card_class
* <chr> <chr> <chr> <chr>
1 A 1-2-3 p-q-r alpha-beta-the…
2 B 2-3-4 q-r-s beta-theta-gam…
3 C 1-2 p-q alpha-beta
假设我有这样一个数据框:
hand_id card_id card_name card_class
A 1 p alpha
A 2 q beta
A 3 r theta
B 2 q beta
B 3 r theta
B 4 s gamma
C 1 p alpha
C 2 q beta
我想将 card_id、card_name 和 card_class 连接成每一手级别 A、B、C 的一行。所以结果看起来像这样:
hand_id combo_1 combo_2 combo_3
A 1-2-3 p-q-r alpha-beta-theta
B 2-3-4 q-r-s beta-theta-gamma
....
我尝试使用 group_by 和变异来做到这一点,但我似乎无法让它工作
data <- read_csv('data.csv')
byHand <- group_by(data, hand_id) %>%
mutate(combo_1 = paste(card_id),
combo_2 = paste(card_name),
combo_3 = paste(card_class))
感谢您的帮助。
不太熟悉 dplyr
...所以这是我在没有 dplyr
df <- read_csv('data.csv')
res <- lapply(split(df, df$hand_id),function(x){
sL <- apply(x[,-1], 2, function(y) paste(y, collapse = "-"))
d <- data.frame(x$hand_id[1], rbind(sL))
names(d) <- c("hand_id", "combo_1", "combo_2", "combo_3")
return(d)
})
res <- do.call("rbind",res)
rownames(res) <- NULL
这是输出:
## hand_id combo_1 combo_2 combo_3
## 1 A 1-2-3 p-q-r alpha-beta-theta
## 2 B 2-3-4 q-r-s beta-theta-gamma
## 3 C 1-2 p-q alpha-beta
你有点接近!
library(tidyr)
library(dplyr)
data <- read_csv('data.csv')
byHand <- group_by(data, hand_id) %>%
summarise(combo_1 = paste(card_id, collapse = "-"),
combo_2 = paste(card_name, collapse = "-"),
combo_3 = paste(card_class, collapse = "-"))
或使用summarise_each
:
byHand <- group_by(data, hand_id) %>%
summarise_each(funs(paste(., collapse = "-")))
这是另一个使用 data.table
library(data.table)
setDT(data)[, lapply(.SD, paste, collapse="-") , by = hand_id]
# hand_id card_id card_name card_class
#1: A 1-2-3 p-q-r alpha-beta-theta
#2: B 2-3-4 q-r-s beta-theta-gamma
#3: C 1-2 p-q alpha-beta
如果您的数据中有 NA,您可以使用 na.omit()
与 str_c()
内联。 unique()
如果您只想要独特的,也可以使用。
数据:
hand_id card_id card_name card_class
<chr> <dbl> <chr> <chr>
1 A 1 p alpha
2 A 2 q beta
3 A 3 r theta
4 A NA NA NA
5 B 2 q beta
6 B 3 r theta
7 B 4 s gamma
8 C 1 p alpha
9 C 2 q beta
代码:
data %>%
group_by(hand_id) %>%
summarize(card_id = str_c(na.omit(card_id), collapse = "-"),
card_name = str_c(na.omit(card_name), collapse = "-"),
card_class = str_c(na.omit(card_class), collapse = "-"))
输出:
hand_id card_id card_name card_class
* <chr> <chr> <chr> <chr>
1 A 1-2-3 p-q-r alpha-beta-the…
2 B 2-3-4 q-r-s beta-theta-gam…
3 C 1-2 p-q alpha-beta