在现有数据框中添加向量作为新列

Adding a vector as new columns in existing data frame

示例数据

df <- data.frame(location = rep(1:1000, each = 36), 
                 year = rep(1980:2015,times = 1000),
                 mu = runif(min =  36.5, max = 43.2, 1000*36),
                 lambda = runif(min =  4.5, max = 4.8, 1000*36))

此数据包含 1000 个地点和 36 年,有两个变量 mu 和 lambda

对于每个位置 X 年组合,我有一个函数需要 lambda 和 mu 的值并生成大小为 12 的向量。示例:

library(grofit)    
dat <- df[df$location == 1 & df$year == 1980,]  
y <- round(gompertz(1:12,100,dat$mu,dat$lambda), digits = 2)
y

  [1]  0.00  0.00  0.00  0.72 18.60 56.37 82.26 93.56 97.76 99.23
  [11] 99.74 99.91

如果我想将 y 作为列添加到 dat

  new.col <- 5:16 
  dat[new.col] <- y
  dat

  location year       mu   lambda V5 V6 V7   V8   V9   V10   V11
     1     1980 39.60263 4.554095  0  0  0 0.72 18.6 56.37 82.26
  V12   V13   V14   V15   V16
  1 93.56 97.76 99.23 99.74 99.91

如您所见,我在 dat 中将 y 作为 V5 至 V16 列附加。我想对 df 中的所有位置和年份组合重复此操作。我希望这是清楚的。

df %>% group_by(location year) %>% mutate(?? how to I add new columns for y??)

使用您生成的数据,无需 summarise() 使用 dplyr。每条记录都是独一无二的。所以这似乎更像是一个使用 apply().

的地方

有很多方法可以遍历它;我刚刚创建了十二个语句。我们将 df 的 mu,lamda 列传递给 apply 函数,然后在每 36000 行中使用您的函数将该向量的 12 部分抓取到 12 个新变量中 y1:y12.

df$y1 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[1])
df$y2 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[2])
df$y3 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[3])
df$y4 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[4])
df$y5 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[5])
df$y6 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[6])
df$y7 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[7])
df$y8 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[8])
df$y9 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[9])
df$y10 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[10])
df$y11 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[11])
df$y12 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[12])
head(df)

  location year       mu   lambda y1 y2 y3   y4    y5    y6    y7    y8    y9   y10   y11   y12
1        1 1980 38.70790 4.531560  0  0  0 0.86 19.00 56.00 81.67 93.18 97.56 99.14 99.70 99.89
2        1 1981 42.64717 4.765444  0  0  0 0.14 12.60 52.22 81.56 93.81 98.01 99.37 99.80 99.94
3        1 1982 39.19041 4.527792  0  0  0 0.85 19.33 56.75 82.27 93.49 97.71 99.20 99.73 99.91
4        1 1983 37.50859 4.565435  0  0  0 0.79 17.46 53.28 79.68 92.13 97.09 98.94 99.62 99.86
5        1 1984 36.71666 4.779357  0  0  0 0.27 11.29 44.76 74.36 89.65 96.05 98.53 99.45 99.80
6        1 1985 42.11325 4.783322  0  0  0 0.13 11.99 50.91 80.66 93.39 97.85 99.31 99.78 99.93

注意:在 dplyr 内,您还可以执行以下操作:

df <- df %>% rowwise() %>% mutate(y1 = round(gompertz(1:12,100,mu,lambda), digits = 2)[1],
                                  y2 = round(gompertz(1:12,100,mu,lambda), digits = 2)[2],
                                  y3 = round(gompertz(1:12,100,mu,lambda), digits = 2)[3],
                                  y4 = round(gompertz(1:12,100,mu,lambda), digits = 2)[4],
                                  y5 = round(gompertz(1:12,100,mu,lambda), digits = 2)[5])

重复6-12,得到同样的结果。

您可以使用 lapply():

library(grofit)    
df2 <- do.call(rbind, lapply(1:nrow(df), 
                             function(x) round(gompertz(1:12, 100, df[x, 3], df[x, 4]), 
                                               digits = 2)))
df3 <- cbind(df, df2)

结果:

> head(df3)
  location year       mu   lambda 1 2 3    4     5     6     7     8     9    10    11    12
1        1 1980 43.04565 4.536717 0 0 0 0.61 20.58 61.23 85.88 95.39 98.54 99.55 99.86 99.96
2        1 1981 39.00524 4.505235 0 0 0 0.96 20.02 57.28 82.45 93.53 97.71 99.20 99.72 99.90
3        1 1982 41.60206 4.619627 0 0 0 0.42 17.07 56.52 83.18 94.23 98.10 99.38 99.80 99.94
4        1 1983 42.01069 4.689058 0 0 0 0.26 14.87 54.43 82.35 93.99 98.04 99.37 99.80 99.94
5        1 1984 40.34275 4.692595 0 0 0 0.30 14.36 52.30 80.54 93.03 97.61 99.20 99.73 99.91
6        1 1985 41.13246 4.641404 0 0 0 0.38 16.20 55.15 82.32 93.84 97.94 99.32 99.78 99.93

数据:

set.seed(47)  # for sake of reproducibility
df <- data.frame(location = rep(1:1000, each = 36), 
                 year = rep(1980:2015, times = 1000),
                 mu = runif(min =  36.5, max = 43.2, 1000 * 36),
                 lambda = runif(min =  4.5, max = 4.8, 1000 * 36))

这是一个 tidyverse 解决方案:

df <- head(df) # we'll work on a sample

library(tidyverse)
df %>%
  mutate(y = map2(mu,lambda,gompertz,time= 1:12,A = 100),
         y = map(y,. %>% round(2) %>% t %>% as_tibble)) %>% # we reformat the vectors as one line tibbles for smooth unnesting
  unnest %>%
  rename_at(5:16,~paste0("y",1:12))
#   location year       mu   lambda y1 y2 y3   y4    y5    y6    y7    y8    y9   y10   y11   y12
# 1        1 1980 38.52133 4.793232  0  0  0 0.20 11.20 46.38 76.37 90.97 96.73 98.84 99.59 99.86
# 2        1 1981 41.05032 4.668713  0  0  0 0.32 15.29 54.04 81.74 93.61 97.86 99.29 99.77 99.92
# 3        1 1982 36.76366 4.687794  0  0  0 0.45 13.67 48.07 76.37 90.55 96.41 98.66 99.51 99.82
# 4        1 1983 42.47994 4.766380  0  0  0 0.14 12.55 51.99 81.37 93.71 97.97 99.36 99.80 99.94
# 5        1 1984 36.58161 4.510503  0  0  0 1.09 18.81 53.90 79.56 91.89 96.92 98.85 99.57 99.84
# 6        1 1985 41.77695 4.705588  0  0  0 0.23 14.29 53.52 81.81 93.75 97.95 99.34 99.79 99.93

和一个应该 运行 更快的基础版本:

new_df <- cbind(df,round(t(mapply(gompertz,  df$mu, df$lambda,MoreArgs = list(time= 1:12, A = 100))),2))
names(new_df)[5:16] <- paste0("y",1:12)
#   location year       mu   lambda y1 y2 y3   y4    y5    y6    y7    y8    y9   y10   y11   y12
# 1        1 1980 38.52133 4.793232  0  0  0 0.20 11.20 46.38 76.37 90.97 96.73 98.84 99.59 99.86
# 2        1 1981 41.05032 4.668713  0  0  0 0.32 15.29 54.04 81.74 93.61 97.86 99.29 99.77 99.92
# 3        1 1982 36.76366 4.687794  0  0  0 0.45 13.67 48.07 76.37 90.55 96.41 98.66 99.51 99.82
# 4        1 1983 42.47994 4.766380  0  0  0 0.14 12.55 51.99 81.37 93.71 97.97 99.36 99.80 99.94
# 5        1 1984 36.58161 4.510503  0  0  0 1.09 18.81 53.90 79.56 91.89 96.92 98.85 99.57 99.84
# 6        1 1985 41.77695 4.705588  0  0  0 0.23 14.29 53.52 81.81 93.75 97.95 99.34 99.79 99.93

mapply 的一个不经常使用的替代方法是 Vectorize,在这种情况下,我认为它的使用是合理的,因为这个函数看起来确实应该首先被矢量化。

gompertz2 <- Vectorize(gompertz,c("mu","lambda"))
new_df <- cbind(df,round(t(gompertz2(1:12, 100, df$mu,df$lambda)),2))
names(new_df)[5:16] <- paste0("y",1:12)

# same output