将数据框中的日期与其他数据框中的两个日期进行比较
Compare a Date in a data frame with two dates in other data frame
我一直在阅读类似的帖子,但我无法使任何解决方案适用于我的情况(可能是因为我是 R 的新手)。
我有一个包含多个参数的长数据集,其中一个是日期,另一个数据框的日期间隔对应于特定值。我试图做一个可重现的例子:
df = data.frame(date=c("2017/08/01 19:00:00","2017/08/01 19:10:00","2017/08/01 19:20:00","2017/08/01 19:30:00",
"2017/08/01 19:40:00","2017/08/01 19:50:00","2017/08/01 20:00:00","2017/08/01 20:10:00"),
factor1=c(10,15,11,13,14,12,16,15))
df2 = data.frame(start=c("2017/08/01 19:00:00","2017/08/01 19:40:00"),
end=c("2017/08/01 19:15:00","2017/08/01 20:05:00"), factor2=c("A","B"))
df$date <- as.POSIXct(df$date)
df2$start <- as.POSIXct(df2$start)
df2$end <- as.POSIXct(df2$end)
我想要的结果是这样的:
result = data.frame(date=c("2017/08/01 19:00:00","2017/08/01 19:10:00","2017/08/01 19:20:00","2017/08/01 19:30:00",
"2017/08/01 19:40:00","2017/08/01 19:50:00","2017/08/01 20:00:00","2017/08/01 20:10:00"),
factor1=c(10,15,11,13,14,12,16,15),factor2=c("A","A","NA","NA","B","B","B","NA"))
我试过 ifelse:
ifelse(df$date >= df2$start & df$date <= df2$end,df2$factor2,"NA")
但是无法让它工作。
有什么建议吗?
我尝试在 sqldf
中使用 inner join
,它似乎有效:
library(sqldf)
df3 = sqldf("select df.*, df2.factor2 from df inner join df2 where df.date >= df2.start and df.date <= df2.end")
result = merge(df, df3, by = "date", all.x = TRUE)
(请注意,我也尝试过使用 left outer join
而不是 inner join
,但这给了我与 inner join
相同的结果......这一定是 sqldf
)
这对您的样本数据同样有效:
result <- df
result$factor2 <- NA
for (i in seq_along(df$date)){
p <- ifelse(length(grep("TRUE", (df$date[i] >= df2$start & df$date[i] <= df2$end)))!=0,
grep("TRUE", (df$date[i] >= df2$start & df$date[i] <= df2$end)),
NA)
result$factor2[i] <- ifelse(!is.na(p),
as.character(df2$factor2[p]),
"NA")
}
print(result)
# date factor1 factor2
#1 2017-08-01 19:00:00 10 A
#2 2017-08-01 19:10:00 15 A
#3 2017-08-01 19:20:00 11 NA
#4 2017-08-01 19:30:00 13 NA
#5 2017-08-01 19:40:00 14 B
#6 2017-08-01 19:50:00 12 B
#7 2017-08-01 20:00:00 16 B
#8 2017-08-01 20:10:00 15 NA
我一直在阅读类似的帖子,但我无法使任何解决方案适用于我的情况(可能是因为我是 R 的新手)。
我有一个包含多个参数的长数据集,其中一个是日期,另一个数据框的日期间隔对应于特定值。我试图做一个可重现的例子:
df = data.frame(date=c("2017/08/01 19:00:00","2017/08/01 19:10:00","2017/08/01 19:20:00","2017/08/01 19:30:00",
"2017/08/01 19:40:00","2017/08/01 19:50:00","2017/08/01 20:00:00","2017/08/01 20:10:00"),
factor1=c(10,15,11,13,14,12,16,15))
df2 = data.frame(start=c("2017/08/01 19:00:00","2017/08/01 19:40:00"),
end=c("2017/08/01 19:15:00","2017/08/01 20:05:00"), factor2=c("A","B"))
df$date <- as.POSIXct(df$date)
df2$start <- as.POSIXct(df2$start)
df2$end <- as.POSIXct(df2$end)
我想要的结果是这样的:
result = data.frame(date=c("2017/08/01 19:00:00","2017/08/01 19:10:00","2017/08/01 19:20:00","2017/08/01 19:30:00",
"2017/08/01 19:40:00","2017/08/01 19:50:00","2017/08/01 20:00:00","2017/08/01 20:10:00"),
factor1=c(10,15,11,13,14,12,16,15),factor2=c("A","A","NA","NA","B","B","B","NA"))
我试过 ifelse:
ifelse(df$date >= df2$start & df$date <= df2$end,df2$factor2,"NA")
但是无法让它工作。
有什么建议吗?
我尝试在 sqldf
中使用 inner join
,它似乎有效:
library(sqldf)
df3 = sqldf("select df.*, df2.factor2 from df inner join df2 where df.date >= df2.start and df.date <= df2.end")
result = merge(df, df3, by = "date", all.x = TRUE)
(请注意,我也尝试过使用 left outer join
而不是 inner join
,但这给了我与 inner join
相同的结果......这一定是 sqldf
)
这对您的样本数据同样有效:
result <- df
result$factor2 <- NA
for (i in seq_along(df$date)){
p <- ifelse(length(grep("TRUE", (df$date[i] >= df2$start & df$date[i] <= df2$end)))!=0,
grep("TRUE", (df$date[i] >= df2$start & df$date[i] <= df2$end)),
NA)
result$factor2[i] <- ifelse(!is.na(p),
as.character(df2$factor2[p]),
"NA")
}
print(result)
# date factor1 factor2
#1 2017-08-01 19:00:00 10 A
#2 2017-08-01 19:10:00 15 A
#3 2017-08-01 19:20:00 11 NA
#4 2017-08-01 19:30:00 13 NA
#5 2017-08-01 19:40:00 14 B
#6 2017-08-01 19:50:00 12 B
#7 2017-08-01 20:00:00 16 B
#8 2017-08-01 20:10:00 15 NA