如何在 r 中将数据从一个数据帧推算到另一个数据帧
How to impute data from one dataframe to another in r
我有两个数据框 df1 和 df2
df1= data.frame( ts = c('2020-01-15', '2020-01-16' , '2020-01-17', '2020-01-20', '2020-01-22','2020-
01-24','2020-01-27','2020-01-30','2020-01-31'),
lla=c(12,13,14,15,16,17,18,19,20),
llb=c(1,2,3,4,6,5,9,8,7),
llc=c(0.6,1.6,2.6,3.6,4.6,5.6,6.6,7.6,8.6),
lld=c(10,11,12,13,14,15,16,154,167))
df2= data.frame( ts = c('2020-01-17','2020-01-24','2020-01-31'),
lla=NA,llb=NA,llc=NA,lld=NA)
如果 df2$ts 的日期与 df1$ts 匹配,它将回顾 4 天前并计算 df2 中每一列的最大值。
例如:
df2$ts 的第一个值是 "2020-01-17" 匹配 df1$ts .. 它会回顾 4 天意味着,它会过滤掉 df1$ts 在 2020-01- 之间的数据13 到 2020-01-17 所以我们得到
#code
df1[(as.Date(df1$ts)>= (as.Date(df2[1,1])-2)) &
(as.Date(df1$ts)<= (as.Date(df2[1,1]))),]
#i am writing this in a loop so that it would iterate over every date of df2
df1= data.frame( ts = c('2020-01-15', '2020-01-16' , '2020-01-17'),
lla=c(12,13,14),
llb=c(1,2,3),
llc=c(0.6,1.6,2.6),
lld=c(10,11,12))
所以现在我们必须获得我们将使用此代码实现的每一列的最大值
#would return the maximum of every column
apply(ohlc[(as.Date(ohlc$ts)>= (as.Date(cls[1,1])-2)) &
(as.Date(ohlc$ts)<= (as.Date(cls[1,1]))),],2,max)
但我不知道如何在 df2 中输入此数据以匹配日期,即“2020-01-17”等 df2 的其他日期
这是 4 天前创建新列后 roll
的选项
library(data.table)
library(lubridate)
# // convert columns to Date class
df1$ts <- as.Date(df1$ts)
df2$ts <- as.Date(df2$ts)
nm1 <- names(df2)[-1]
# // change the type of NA columns from logical to numeric
setDT(df2)[, (nm1) := lapply(.SD, as.numeric), .SDcols = nm1]
# // subtract 4 days from ts to create ts1
setDT(df1)[, ts1 := ts %m-% days(4)]
# do a rolling join while getting the `max` for each column
df2[df1, (nm1) := lapply(mget(paste0("i.", nm1)), max),
on = .(ts = ts1), roll = -Inf, by = .EACHI]
尝试:
libray(dplyr)
df1 <- df1 %>% mutate(ts=as.Date(ts))
df2 <- df2 %>% mutate(ts=as.Date(ts))
my_function <- function(x,df){
df %>% filter(ts >= (x$ts-3) & ts <= x$ts) %>%
mutate(ts=x$ts) %>%
summarise(across(.cols = lla:lld, .fns = max)) %>%
mutate(ts=x$ts)}
lapply(split(df2,df2$ts),my_function,df=df1) %>% do.call(rbind,.)
使用 runner
的替代方法
df1$ts <- as.Date(df1$ts)
df2$ts <- as.Date(df2$ts)
library(runner)
library(dplyr)
df2 %>%
mutate(across(!ts, ~max_run(x = df1[[cur_column()]],
k = 4,
idx = df1$ts,
at = cur_data()[[1]])))
#> ts lla llb llc lld
#> 1 2020-01-17 14 3 2.6 12
#> 2 2020-01-24 17 6 5.6 15
#> 3 2020-01-31 20 8 8.6 167
由 reprex package (v2.0.0)
于 2021-06-06 创建
我有两个数据框 df1 和 df2
df1= data.frame( ts = c('2020-01-15', '2020-01-16' , '2020-01-17', '2020-01-20', '2020-01-22','2020-
01-24','2020-01-27','2020-01-30','2020-01-31'),
lla=c(12,13,14,15,16,17,18,19,20),
llb=c(1,2,3,4,6,5,9,8,7),
llc=c(0.6,1.6,2.6,3.6,4.6,5.6,6.6,7.6,8.6),
lld=c(10,11,12,13,14,15,16,154,167))
df2= data.frame( ts = c('2020-01-17','2020-01-24','2020-01-31'),
lla=NA,llb=NA,llc=NA,lld=NA)
如果 df2$ts 的日期与 df1$ts 匹配,它将回顾 4 天前并计算 df2 中每一列的最大值。
例如:
df2$ts 的第一个值是 "2020-01-17" 匹配 df1$ts .. 它会回顾 4 天意味着,它会过滤掉 df1$ts 在 2020-01- 之间的数据13 到 2020-01-17 所以我们得到
#code
df1[(as.Date(df1$ts)>= (as.Date(df2[1,1])-2)) &
(as.Date(df1$ts)<= (as.Date(df2[1,1]))),]
#i am writing this in a loop so that it would iterate over every date of df2
df1= data.frame( ts = c('2020-01-15', '2020-01-16' , '2020-01-17'),
lla=c(12,13,14),
llb=c(1,2,3),
llc=c(0.6,1.6,2.6),
lld=c(10,11,12))
所以现在我们必须获得我们将使用此代码实现的每一列的最大值
#would return the maximum of every column
apply(ohlc[(as.Date(ohlc$ts)>= (as.Date(cls[1,1])-2)) &
(as.Date(ohlc$ts)<= (as.Date(cls[1,1]))),],2,max)
但我不知道如何在 df2 中输入此数据以匹配日期,即“2020-01-17”等 df2 的其他日期
这是 4 天前创建新列后 roll
的选项
library(data.table)
library(lubridate)
# // convert columns to Date class
df1$ts <- as.Date(df1$ts)
df2$ts <- as.Date(df2$ts)
nm1 <- names(df2)[-1]
# // change the type of NA columns from logical to numeric
setDT(df2)[, (nm1) := lapply(.SD, as.numeric), .SDcols = nm1]
# // subtract 4 days from ts to create ts1
setDT(df1)[, ts1 := ts %m-% days(4)]
# do a rolling join while getting the `max` for each column
df2[df1, (nm1) := lapply(mget(paste0("i.", nm1)), max),
on = .(ts = ts1), roll = -Inf, by = .EACHI]
尝试:
libray(dplyr)
df1 <- df1 %>% mutate(ts=as.Date(ts))
df2 <- df2 %>% mutate(ts=as.Date(ts))
my_function <- function(x,df){
df %>% filter(ts >= (x$ts-3) & ts <= x$ts) %>%
mutate(ts=x$ts) %>%
summarise(across(.cols = lla:lld, .fns = max)) %>%
mutate(ts=x$ts)}
lapply(split(df2,df2$ts),my_function,df=df1) %>% do.call(rbind,.)
使用 runner
df1$ts <- as.Date(df1$ts)
df2$ts <- as.Date(df2$ts)
library(runner)
library(dplyr)
df2 %>%
mutate(across(!ts, ~max_run(x = df1[[cur_column()]],
k = 4,
idx = df1$ts,
at = cur_data()[[1]])))
#> ts lla llb llc lld
#> 1 2020-01-17 14 3 2.6 12
#> 2 2020-01-24 17 6 5.6 15
#> 3 2020-01-31 20 8 8.6 167
由 reprex package (v2.0.0)
于 2021-06-06 创建