加入两个数据框,其中一个数据框的日期落入另一个数据框的时间间隔。 R
Joining two dataframes where a date in one falls into an interval in another. R
我正在尝试根据第一个数据框的日期是否适合第二个数据框的时间间隔,用另一个数据框的值填充一个数据框。
现在,我正在使用嵌套的 for 循环执行此操作,但不用说,这种方法非常慢。
这是一些示例数据和我的嵌套 for 循环:
library(lubridate)
periods <- structure(list(week = structure(c(16475, 16489, 16531, 16545,16559, 16573, 16587, 16615, 16629, 16643, 16657, 16671, 16685,
16699, 16727, 16741, 16755, 16769, 16783, 16797, 16811, 16825
), class = "Date"), poll = c(6.5, 4, 12, 11.5, 13, 9.5, 7, 8,
4.5, 4.5, 7.5, 4.8, 6.33333333333333, 7.5, 11.125, 13, 12, 12.8571428571429,
10.5, 13, 11, 4)), .Names = c("week", "poll"), row.names = 82:103, class = "data.frame")
periods$week <- as.interval(ymd(period$week), ymd(period$week + weeks(2)))
weeks <- structure(list(week = structure(c(16720, 16622, 16776, 16720,
16734, 16741), class = "Date"), poll = c(NA, NA, NA, NA, NA,
13)), .Names = c("week", "poll"), row.names = c(NA, 6L), class = "data.frame")
for (i in seq_along(weeks$week)){
x <- weeks$week[i]
for (j in seq_along(periods$int)){
if (is.na(x)==T){next}
else if (x %within% periods$int[j]==T){weeks$poll <- periods[j,2]}
else {next}
}
}
我假设有一个应用函数可以加快速度,但我似乎无法让它工作...非常感谢所有帮助!
我即兴创作了一个适用于我的案例的解决方案,所以我将 post 放在这里以防其他人发现自己处于类似的困境中。
library(lubridate)
library(data.table)
periods <- structure(list(week = structure(c(16475, 16489, 16531, 16545,16559, 16573, 16587, 16615, 16629, 16643, 16657, 16671, 16685,
16699, 16727, 16741, 16755, 16769, 16783, 16797, 16811, 16825
), class = "Date"), poll = c(6.5, 4, 12, 11.5, 13, 9.5, 7, 8,
4.5, 4.5, 7.5, 4.8, 6.33333333333333, 7.5, 11.125, 13, 12, 12.8571428571429,
10.5, 13, 11, 4)), .Names = c("week", "poll"), row.names = 82:103, class = "data.frame")
periods$week2 <- ymd(periods$week + weeks(2))
structure(list(week = structure(c(16720, 16622, 16776, 16720,
16734, 16741), class = "Date"), poll = c(NA, NA, NA, NA, NA,
NA)), .Names = c("week", "poll"), row.names = c(NA, 6L), class = "data.frame")
week$week2 <- week$week
setDT(periods)
setDT(weeks)
setkey(periods, week, week2)
setkey(weeks, week, week2)
merged = foverlaps(periods, weeks, by.x=c("week", "week2"))
它不是很漂亮,但它适用于我的情况。
我正在尝试根据第一个数据框的日期是否适合第二个数据框的时间间隔,用另一个数据框的值填充一个数据框。
现在,我正在使用嵌套的 for 循环执行此操作,但不用说,这种方法非常慢。
这是一些示例数据和我的嵌套 for 循环:
library(lubridate)
periods <- structure(list(week = structure(c(16475, 16489, 16531, 16545,16559, 16573, 16587, 16615, 16629, 16643, 16657, 16671, 16685,
16699, 16727, 16741, 16755, 16769, 16783, 16797, 16811, 16825
), class = "Date"), poll = c(6.5, 4, 12, 11.5, 13, 9.5, 7, 8,
4.5, 4.5, 7.5, 4.8, 6.33333333333333, 7.5, 11.125, 13, 12, 12.8571428571429,
10.5, 13, 11, 4)), .Names = c("week", "poll"), row.names = 82:103, class = "data.frame")
periods$week <- as.interval(ymd(period$week), ymd(period$week + weeks(2)))
weeks <- structure(list(week = structure(c(16720, 16622, 16776, 16720,
16734, 16741), class = "Date"), poll = c(NA, NA, NA, NA, NA,
13)), .Names = c("week", "poll"), row.names = c(NA, 6L), class = "data.frame")
for (i in seq_along(weeks$week)){
x <- weeks$week[i]
for (j in seq_along(periods$int)){
if (is.na(x)==T){next}
else if (x %within% periods$int[j]==T){weeks$poll <- periods[j,2]}
else {next}
}
}
我假设有一个应用函数可以加快速度,但我似乎无法让它工作...非常感谢所有帮助!
我即兴创作了一个适用于我的案例的解决方案,所以我将 post 放在这里以防其他人发现自己处于类似的困境中。
library(lubridate)
library(data.table)
periods <- structure(list(week = structure(c(16475, 16489, 16531, 16545,16559, 16573, 16587, 16615, 16629, 16643, 16657, 16671, 16685,
16699, 16727, 16741, 16755, 16769, 16783, 16797, 16811, 16825
), class = "Date"), poll = c(6.5, 4, 12, 11.5, 13, 9.5, 7, 8,
4.5, 4.5, 7.5, 4.8, 6.33333333333333, 7.5, 11.125, 13, 12, 12.8571428571429,
10.5, 13, 11, 4)), .Names = c("week", "poll"), row.names = 82:103, class = "data.frame")
periods$week2 <- ymd(periods$week + weeks(2))
structure(list(week = structure(c(16720, 16622, 16776, 16720,
16734, 16741), class = "Date"), poll = c(NA, NA, NA, NA, NA,
NA)), .Names = c("week", "poll"), row.names = c(NA, 6L), class = "data.frame")
week$week2 <- week$week
setDT(periods)
setDT(weeks)
setkey(periods, week, week2)
setkey(weeks, week, week2)
merged = foverlaps(periods, weeks, by.x=c("week", "week2"))
它不是很漂亮,但它适用于我的情况。