基于时间变量固定连续增加的数据点线性插值函数

Function for linear interpolation of data points based on a fixed continuous increase in the time-variable

我的数据 res 中的 CIF 中存储了三个治疗组。基于可重现的例子:

> table(res$CIF)

Control     SSA    SSTR 
     35      25       5 

对于每个 res$CIF,在时间 res$time 有一个结果的估计概率 res$P。概率为 95%-CI,如 res$lowerres$upper

因此

> head(res,10)
     CIF          P time       lower     upper
 1: SSTR 0.12500000 1.00 0.032811154 0.4140186
 2: SSTR 0.31250000 2.00 0.143720822 0.5953967
 3: SSTR 0.62500000 3.00 0.402281539 0.8457738
 4: SSTR 0.81250000 4.00 0.597544067 0.9539839
 5: SSTR 0.87500000 5.00 0.672015645 0.9793250
 6:  SSA 0.02777778 0.72 0.003960129 0.1812693
 7:  SSA 0.08333333 1.00 0.027663184 0.2365298
 8:  SSA 0.11111111 1.02 0.043219272 0.2694809
 9:  SSA 0.13888889 1.08 0.060294215 0.3020083
10:  SSA 0.16666667 1.48 0.078561410 0.3338737

我需要在 res$time 当前未给出的时间点线性插值 res$P 和相应的 res$lower + res$upper

我正在寻找一个函数(或类似的解决方案 - 也许是 dplyr 中的解决方案?)扩展 res 以便 res$time 的范围从 0 到每个 res$CIF 最多 res$timeres$time 应连续增加 .01

即,从0105.1630.855.00分别由.01

> tapply(res$time,res$CIF,max)
Control     SSA    SSTR 
 105.16   30.85    5.00  

预期输出示例

# Before interpolation 
> head(res[res$CIF=="SSA"],5)
   CIF          P time       lower     upper
1: SSA 0.02777778 0.72 0.003960129 0.1812693
2: SSA 0.08333333 1.00 0.027663184 0.2365298
3: SSA 0.11111111 1.02 0.043219272 0.2694809
4: SSA 0.13888889 1.08 0.060294215 0.3020083
5: SSA 0.16666667 1.48 0.078561410 0.3338737

应该给

CIF           P   time       lower        upper
SSA           0   0.00           0            0 
SSA interpolate   0.01 interpolate  interpolate
(....)
SSA  0.02777778   0.72 0.003960129    0.1812693
(....)
SSA  0.08333333   1.00 0.027663184    0.2365298
SSA interpolate   1.01 interpolate  interpolate
SSA  0.11111111   1.02 0.043219272    0.2694809
SSA interpolate   1.03 interpolate  interpolate
SSA interpolate   1.04 interpolate  interpolate
SSA interpolate   1.05 interpolate  interpolate
SSA interpolate   1.06 interpolate  interpolate
SSA interpolate   1.07 interpolate  interpolate
SSA  0.13888889   1.08 0.060294215    0.3020083

我的数据res

res <- structure(list(CIF = c("SSTR", "SSTR", "SSTR", "SSTR", "SSTR", 
"SSA", "SSA", "SSA", "SSA", "SSA", "SSA", "SSA", "SSA", "SSA", 
"SSA", "SSA", "SSA", "SSA", "SSA", "SSA", "SSA", "SSA", "SSA", 
"SSA", "SSA", "SSA", "SSA", "SSA", "SSA", "SSA", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control"), P = c(0.125, 0.3125, 0.625, 
0.8125, 0.875, 0.0277777777777778, 0.0833333333333333, 0.111111111111111, 
0.138888888888889, 0.166666666666667, 0.305555555555556, 0.361111111111111, 
0.388888888888889, 0.416666666666667, 0.444444444444445, 0.472222222222222, 
0.527777777777778, 0.555555555555556, 0.583333333333333, 0.613095238095238, 
0.642857142857143, 0.672619047619048, 0.672619047619048, 0.732142857142857, 
0.761904761904762, 0.791666666666667, 0.821428571428572, 0.858630952380953, 
0.895833333333333, 0.970238095238095, 0.025, 0.025, 0.025, 0.025, 
0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.15, 0.175, 0.2, 0.225, 
0.25, 0.275, 0.3, 0.325, 0.35, 0.375, 0.4, 0.425, 0.45, 0.478333333333333, 
0.506666666666667, 0.535, 0.565909090909091, 0.596818181818182, 
0.631590909090909, 0.666363636363636, 0.701136363636364, 0.735909090909091, 
0.770681818181818, 0.805454545454545, 0.840227272727273), time = c(1, 
2, 3, 4, 5, 0.72, 1, 1.02, 1.08, 1.48, 2, 2.76, 2.82, 2.83, 3, 
3.08, 3.57, 5.07, 5.49, 8.03, 9, 9.2, 9.25, 10.8, 11, 11.04, 
14.2, 15.05, 21.42, 30.85, 0.23, 0.26, 0.49, 0.53, 0.69, 3.15, 
3.25, 4.5, 5.12, 5.78, 6.67, 7.65, 7.79, 7.85, 9, 9.99, 11.37, 
12.68, 13.11, 15.05, 15.83, 16.89, 18.17, 22.7, 23.59, 29.6, 
32.65, 35.81, 43, 43.79, 45.37, 46.45, 46.65, 69.02, 105.16), 
    lower = c(0.0328111539708764, 0.143720822077878, 0.402281538850257, 
    0.597544067030214, 0.672015645447537, 0.00396012891352548, 
    0.0276631838805069, 0.0432192720492351, 0.0602942151317983, 
    0.0785614100173507, 0.18231512406141, 0.228150254051563, 
    0.251830579852024, 0.275990636539769, 0.300615683301302, 
    0.32569554091635, 0.377199241260733, 0.403622389357051, 0.430498943169836, 
    0.45921123839169, 0.48851369241709, 0.518429809974778, 0.518429809974778, 
    0.580243253031609, 0.612241368451204, 0.645062949439568, 
    0.678811652981003, 0.718192077960655, 0.761035083198697, 
    0.86875565093952, 0.00355981698627006, 0.00355981698627006, 
    0.00355981698627006, 0.00355981698627006, 0.00355981698627006, 
    0.0127444761396915, 0.0248237564000704, 0.0387548083597935, 
    0.0540289278928447, 0.0703518337133965, 0.0703518337133965, 
    0.0875349177801499, 0.105448804967606, 0.124000413393049, 
    0.143120415258911, 0.162755851975246, 0.182865529959695, 
    0.20341701986178, 0.22438463188234, 0.245748013164318, 0.267491158005832, 
    0.289601702577961, 0.312070423267255, 0.337084153386502, 
    0.362632734261147, 0.388713550110491, 0.417009262625303, 
    0.446060748331539, 0.478344299849033, 0.511927776545623, 
    0.546903575783177, 0.583428286459313, 0.621755981338108, 
    0.662308487815083, 0.705837515898454), upper = c(0.414018563190984, 
    0.595396721303893, 0.845773808747176, 0.953983876394769, 
    0.979324974514942, 0.181269269945454, 0.236529759895451, 
    0.269480930499581, 0.302008337476789, 0.333873667543882, 
    0.483459160112475, 0.539346001089099, 0.566541091565836, 
    0.593255777919075, 0.619498863619302, 0.645275418976181, 
    0.695431242848002, 0.719802607844794, 0.743691288858428, 
    0.769343219994671, 0.794279647503738, 0.818479990760844, 
    0.818479990760844, 0.864527983506889, 0.886263823263894, 
    0.907030011614637, 0.926703065538918, 0.951295364401731, 
    0.971948922997515, 0.997717755344036, 0.164514540888494, 
    0.164514540888494, 0.164514540888494, 0.164514540888494, 
    0.164514540888494, 0.185453879855473, 0.214782436042548, 
    0.244858945502003, 0.274592902916402, 0.303763783420835, 
    0.303763783420835, 0.332343773644891, 0.360355238540763, 
    0.387832132523894, 0.414808316400823, 0.441314057995926, 
    0.467375240079267, 0.493013472868414, 0.518246474164134, 
    0.543088485840094, 0.56755064509021, 0.591641285440807, 0.615366163389952, 
    0.642996938577391, 0.669919078034907, 0.696160621236471, 
    0.724897048548371, 0.752630120302531, 0.78395657751485, 0.813600779732339, 
    0.841596636148257, 0.867931528334935, 0.892542509012415, 
    0.91530398695896, 0.935999137248638)), row.names = c(NA, 
-65L), class = c("data.table", "data.frame"))

这是 tidyrcompletezoona.approx 的方法:

library(dplyr)
library(tidyr)
library(zoo)
res %>%
  group_by(CIF) %>%
  complete(time = seq(0, max(time), by = 0.01)) %>%
  mutate_at(vars(P,lower,upper),
            .funs = list(~ifelse(time == 0 & is.na(.), 0, .))) %>%
  mutate_at(vars(P,lower,upper),
            .funs = list(~ zoo::na.approx(.))) 

如果您想使用非线性方法,您可以尝试 spline:

res %>%
  group_by(CIF) %>%
  complete(time = seq(0, max(time), by = 0.01)) %>%
  mutate_at(vars(P,lower,upper),
            .funs = list(~ifelse(time == 0 & is.na(.), 0, .))) %>%
  mutate_at(vars(P,lower,upper),
            .funs = list(~ spline(time, . ,n=n())$y)) 

这不会替换 NA,而是用内插值替换 所有 值。