根据 data.table in r 中的多个条件创建列
Create a column based on a multiple conditions in data.table in r
我希望使用 data.table 根据其他两个列条件创建一个新列。这是我的示例代码:
group <- c(1,1,1,2,2,2,3,3,3,4,4,4)
date <- c(6,2,3,7,6,9,7,1,4,6,8,9)
val1<- c("","A","A","","A","A","","A","A","","A","A")
df1<-data.frame(group,date,val1)
dt1<-as.data.table(df1)
这是输出:
group date val1
1 6
1 2 A
1 3 A
2 7
2 6 A
2 9 A
3 7
3 1 A
3 4 A
4 6
4 8 A
4 9 A
鉴于每个组 (1,2,3,4) 中的 val1 = A,我正在寻找日期的最小值,如下所示:
group date val1 findmin
1 6
1 2 A Y
1 3 A
2 7
2 6 A Y
2 9 A
3 7
3 1 A Y
3 4 A
4 6
4 8 A Y
4 9 A
我试过了
dt1[,findmin:= ifelse(date=min(date[val1 == "A"])),"Y","", by = group]
读作:如果 date minimum date where val1 = "A",将 "Y" 放入名为 'findmin' 的新列中,否则什么都不放,并对每个组执行此操作( 1,2,3,4)。我收到此错误:
Error in `[.data.table`(dt1, , `:=`(findmin, ifelse(min(date[val1 == "A"]))), :
Provide either by= or keyby= but not both
非常感谢您的帮助,谢谢!
你必须小心你的括号,并且用 ==
:
检查相等性
dt1[,findmin := fifelse(date == min(date[val1 == "A"]), "Y", ""), by = group]
此代码使用 dplyr 有效。我相信有更优雅的方法可以做到这一点。
if (!require(dplyr)) {
install.packages("dplyr")
}
library(dplyr)
if (!require(data.table)) {
install.packages("data.table")
}
library(data.table)
group <- c(1,1,1,2,2,2,3,3,3,4,4,4)
date <- c(6,2,3,7,6,9,7,1,4,6,8,9)
val1<- c("","A","A","","A","A","","A","A","","A","A")
df1<-data.frame(group,date,val1)
dt1<-as.data.table(df1)
# filter for A
df2 <- df1 %>% filter(val1 == "A")
# group by group, arrange by date, get the 1st row, ungroup, add findmin = Y
df3 <- df2 %>% group_by(group) %>% arrange(date) %>% slice(1) %>% ungroup() %>% mutate(findmin = "Y", )
# join back to the original data
df4 <- df1 %>% left_join(df3, by = c("group", "date", "val1"))
# set NA in findmin to "" if you want
df5 <- df4 %>% mutate(findmin = ifelse(is.na(findmin), "", findmin))
# print
df5
group date val1 findmin
1 1 6
2 1 2 A Y
3 1 3 A
4 2 7
5 2 6 A Y
6 2 9 A
7 3 7
8 3 1 A Y
9 3 4 A
10 4 6
11 4 8 A Y
12 4 9 A
使用随机数据进行测试
# test randomized
df6 <- sample_frac(df1, size=1)
df6
group date val1
1 3 4 A
2 3 1 A
3 4 8 A
4 4 6
5 4 9 A
6 2 9 A
7 2 7
8 3 7
9 1 3 A
10 1 6
11 2 6 A
12 1 2 A
df6 <- df6 %>%
filter(val1 == "A") %>%
group_by(group) %>%
arrange(date) %>%
slice(1) %>%
ungroup() %>%
mutate(findmin = "Y", )
df7 <- df1 %>%
left_join(df6, by = c("group", "date", "val1")) %>%
mutate(findmin = ifelse(is.na(findmin), "", findmin)) %>%
arrange(group, val1, date, findmin)
df7
group date val1 findmin
1 1 6
2 1 2 A Y
3 1 3 A
4 2 7
5 2 6 A Y
6 2 9 A
7 3 7
8 3 1 A Y
9 3 4 A
10 4 6
11 4 8 A Y
12 4 9 A
替代使用 which.min 代替 arrange 和 slice
df6 <- sample_frac(df1, size=1)
df6
df6 <- df6 %>%
filter(val1 == "A") %>%
group_by(group) %>%
slice(which.min(date)) %>%
ungroup() %>%
mutate(findmin = "Y", )
df7 <- df1 %>%
left_join(df6, by = c("group", "date", "val1")) %>%
mutate(findmin = ifelse(is.na(findmin), "", findmin)) %>%
arrange(group, val1, date, findmin)
df7
group date val1 findmin
1 1 6
2 1 2 A Y
3 1 3 A
4 2 7
5 2 6 A Y
6 2 9 A
7 3 7
8 3 1 A Y
9 3 4 A
10 4 6
11 4 8 A Y
12 4 9 A
我希望使用 data.table 根据其他两个列条件创建一个新列。这是我的示例代码:
group <- c(1,1,1,2,2,2,3,3,3,4,4,4)
date <- c(6,2,3,7,6,9,7,1,4,6,8,9)
val1<- c("","A","A","","A","A","","A","A","","A","A")
df1<-data.frame(group,date,val1)
dt1<-as.data.table(df1)
这是输出:
group date val1
1 6
1 2 A
1 3 A
2 7
2 6 A
2 9 A
3 7
3 1 A
3 4 A
4 6
4 8 A
4 9 A
鉴于每个组 (1,2,3,4) 中的 val1 = A,我正在寻找日期的最小值,如下所示:
group date val1 findmin
1 6
1 2 A Y
1 3 A
2 7
2 6 A Y
2 9 A
3 7
3 1 A Y
3 4 A
4 6
4 8 A Y
4 9 A
我试过了
dt1[,findmin:= ifelse(date=min(date[val1 == "A"])),"Y","", by = group]
读作:如果 date minimum date where val1 = "A",将 "Y" 放入名为 'findmin' 的新列中,否则什么都不放,并对每个组执行此操作( 1,2,3,4)。我收到此错误:
Error in `[.data.table`(dt1, , `:=`(findmin, ifelse(min(date[val1 == "A"]))), :
Provide either by= or keyby= but not both
非常感谢您的帮助,谢谢!
你必须小心你的括号,并且用 ==
:
dt1[,findmin := fifelse(date == min(date[val1 == "A"]), "Y", ""), by = group]
此代码使用 dplyr 有效。我相信有更优雅的方法可以做到这一点。
if (!require(dplyr)) {
install.packages("dplyr")
}
library(dplyr)
if (!require(data.table)) {
install.packages("data.table")
}
library(data.table)
group <- c(1,1,1,2,2,2,3,3,3,4,4,4)
date <- c(6,2,3,7,6,9,7,1,4,6,8,9)
val1<- c("","A","A","","A","A","","A","A","","A","A")
df1<-data.frame(group,date,val1)
dt1<-as.data.table(df1)
# filter for A
df2 <- df1 %>% filter(val1 == "A")
# group by group, arrange by date, get the 1st row, ungroup, add findmin = Y
df3 <- df2 %>% group_by(group) %>% arrange(date) %>% slice(1) %>% ungroup() %>% mutate(findmin = "Y", )
# join back to the original data
df4 <- df1 %>% left_join(df3, by = c("group", "date", "val1"))
# set NA in findmin to "" if you want
df5 <- df4 %>% mutate(findmin = ifelse(is.na(findmin), "", findmin))
# print
df5
group date val1 findmin
1 1 6
2 1 2 A Y
3 1 3 A
4 2 7
5 2 6 A Y
6 2 9 A
7 3 7
8 3 1 A Y
9 3 4 A
10 4 6
11 4 8 A Y
12 4 9 A
使用随机数据进行测试
# test randomized
df6 <- sample_frac(df1, size=1)
df6
group date val1
1 3 4 A
2 3 1 A
3 4 8 A
4 4 6
5 4 9 A
6 2 9 A
7 2 7
8 3 7
9 1 3 A
10 1 6
11 2 6 A
12 1 2 A
df6 <- df6 %>%
filter(val1 == "A") %>%
group_by(group) %>%
arrange(date) %>%
slice(1) %>%
ungroup() %>%
mutate(findmin = "Y", )
df7 <- df1 %>%
left_join(df6, by = c("group", "date", "val1")) %>%
mutate(findmin = ifelse(is.na(findmin), "", findmin)) %>%
arrange(group, val1, date, findmin)
df7
group date val1 findmin
1 1 6
2 1 2 A Y
3 1 3 A
4 2 7
5 2 6 A Y
6 2 9 A
7 3 7
8 3 1 A Y
9 3 4 A
10 4 6
11 4 8 A Y
12 4 9 A
替代使用 which.min 代替 arrange 和 slice
df6 <- sample_frac(df1, size=1)
df6
df6 <- df6 %>%
filter(val1 == "A") %>%
group_by(group) %>%
slice(which.min(date)) %>%
ungroup() %>%
mutate(findmin = "Y", )
df7 <- df1 %>%
left_join(df6, by = c("group", "date", "val1")) %>%
mutate(findmin = ifelse(is.na(findmin), "", findmin)) %>%
arrange(group, val1, date, findmin)
df7
group date val1 findmin
1 1 6
2 1 2 A Y
3 1 3 A
4 2 7
5 2 6 A Y
6 2 9 A
7 3 7
8 3 1 A Y
9 3 4 A
10 4 6
11 4 8 A Y
12 4 9 A