R 在值之间查找

R lookup between values

我对 R 比较陌生,我正在尝试在多个数据帧之间进行查找。 我有 df1,它显示我的车 enters/exits 我们的车队。 df2 显示它何时被维修。 我想创建 df3,它显示一组特定的汽车(每个 colheaders)它们是否在给定的时间步(Yes/No 或 1/0 结果)可用。

示例数据如下,df1 和 df2 中会有许多汽车,但并非所有这些都在 df3 中。

我尝试过 between 和 foverlaps 但没有成功,我开始怀疑这是否真的可以在 R 中完成,或者我是否需要恢复到 [=32 中的许多 if 语句=].

df1

CarID Entry Exit
Car1  1   100
Car2  5   95

等等

df2(显示服务时间表)

CarID  ServiceType  Start  End
Car1  TypeA  10  20
Car1  TypeA  30  40
Car1  TypeB  45  46
Car2  TypeA  20  30

等等

df3(想create/populate这个table)

Date  Car1  Car2 
1
2

使用data.table:

library(data.table)

df1 <- df1[, .(Date = seq(Entry, Exit, by = 1), Available = "Yes"), by=.(CarID)]
df2 <- df2[, .(Date = seq(Start, End, by = 1), Available ="No"), by=.(CarID, ServiceType, Start)][, -c(2,3)]
df2 <- dcast(df2, Date~CarID, value.var = "Available")
df3 <- dcast(df1, Date~CarID, value.var = "Available")
df3 <- merge(df3, df2, by = "Date", all = T)
for (i in unique(df1$CarID)) {
  df3[get(paste0(i, ".y")) == "No", paste0(i, ".x") := "No"]
  df3[, eval(i) := get(paste0(i, ".x"))]
}
df3 <- df3[, -(2:as.numeric(2*length(unique(df1$CarID))+1))]

结果为:

> dput(df3)
structure(list(Date = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 
93, 94, 95, 96, 97, 98, 99, 100), Car1 = c("Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", 
"Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes"), Car2 = c(NA, NA, NA, NA, "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", NA, NA, NA, NA, NA)), sorted = "Date", class = c("data.table", 
"data.frame"), row.names = c(NA, -100L), .internal.selfref = <pointer: 0x0000017d31e21ef0>)

还有一个选项:

library(data.table)
setDT(df1)
setDT(df2)
df3 <- df1[, .(Date = seq(Entry, Exit)), by = CarID]
df3[df2, avail := ServiceType, on = .(CarID, Date >= Start, Date <= End)
  ][, avail := is.na(avail) ][]
#       CarID  Date  avail
#      <char> <int> <lgcl>
#   1:   Car1     1   TRUE
#   2:   Car1     2   TRUE
#   3:   Car1     3   TRUE
#   4:   Car1     4   TRUE
#   5:   Car1     5   TRUE
#   6:   Car1     6   TRUE
#   7:   Car1     7   TRUE
#   8:   Car1     8   TRUE
#   9:   Car1     9   TRUE
#  10:   Car1    10  FALSE
#  ---                    
# 182:   Car2    86   TRUE
# 183:   Car2    87   TRUE
# 184:   Car2    88   TRUE
# 185:   Car2    89   TRUE
# 186:   Car2    90   TRUE
# 187:   Car2    91   TRUE
# 188:   Car2    92   TRUE
# 189:   Car2    93   TRUE
# 190:   Car2    94   TRUE
# 191:   Car2    95   TRUE

最后加宽:

dcast(df3, Date ~ CarID, value.var = "avail")
#     Date  Car1  Car2
# 1      1  TRUE    NA
# 2      2  TRUE    NA
# 3      3  TRUE    NA
# 4      4  TRUE    NA
# 5      5  TRUE  TRUE
# 6      6  TRUE  TRUE
# 7      7  TRUE  TRUE
# 8      8  TRUE  TRUE
# 9      9  TRUE  TRUE
# 10    10 FALSE  TRUE
# 11    11 FALSE  TRUE
### ...

如果您需要 NA 改为 FALSE(因为它们根本不可用),那么 dcast(..., fill = FALSE).