在现有数据框中添加向量作为新列
Adding a vector as new columns in existing data frame
示例数据
df <- data.frame(location = rep(1:1000, each = 36),
year = rep(1980:2015,times = 1000),
mu = runif(min = 36.5, max = 43.2, 1000*36),
lambda = runif(min = 4.5, max = 4.8, 1000*36))
此数据包含 1000 个地点和 36 年,有两个变量
mu 和 lambda
对于每个位置 X 年组合,我有一个函数需要
lambda 和 mu 的值并生成大小为 12 的向量。示例:
library(grofit)
dat <- df[df$location == 1 & df$year == 1980,]
y <- round(gompertz(1:12,100,dat$mu,dat$lambda), digits = 2)
y
[1] 0.00 0.00 0.00 0.72 18.60 56.37 82.26 93.56 97.76 99.23
[11] 99.74 99.91
如果我想将 y
作为列添加到 dat
new.col <- 5:16
dat[new.col] <- y
dat
location year mu lambda V5 V6 V7 V8 V9 V10 V11
1 1980 39.60263 4.554095 0 0 0 0.72 18.6 56.37 82.26
V12 V13 V14 V15 V16
1 93.56 97.76 99.23 99.74 99.91
如您所见,我在 dat
中将 y 作为 V5 至 V16 列附加。我想对 df
中的所有位置和年份组合重复此操作。我希望这是清楚的。
df %>% group_by(location year) %>% mutate(?? how to I add new columns for y??)
使用您生成的数据,无需 summarise()
使用 dplyr。每条记录都是独一无二的。所以这似乎更像是一个使用 apply()
.
的地方
有很多方法可以遍历它;我刚刚创建了十二个语句。我们将 df
的 mu,lamda 列传递给 apply 函数,然后在每 36000 行中使用您的函数将该向量的 12 部分抓取到 12 个新变量中 y1:y12.
df$y1 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[1])
df$y2 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[2])
df$y3 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[3])
df$y4 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[4])
df$y5 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[5])
df$y6 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[6])
df$y7 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[7])
df$y8 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[8])
df$y9 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[9])
df$y10 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[10])
df$y11 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[11])
df$y12 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[12])
head(df)
location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
1 1 1980 38.70790 4.531560 0 0 0 0.86 19.00 56.00 81.67 93.18 97.56 99.14 99.70 99.89
2 1 1981 42.64717 4.765444 0 0 0 0.14 12.60 52.22 81.56 93.81 98.01 99.37 99.80 99.94
3 1 1982 39.19041 4.527792 0 0 0 0.85 19.33 56.75 82.27 93.49 97.71 99.20 99.73 99.91
4 1 1983 37.50859 4.565435 0 0 0 0.79 17.46 53.28 79.68 92.13 97.09 98.94 99.62 99.86
5 1 1984 36.71666 4.779357 0 0 0 0.27 11.29 44.76 74.36 89.65 96.05 98.53 99.45 99.80
6 1 1985 42.11325 4.783322 0 0 0 0.13 11.99 50.91 80.66 93.39 97.85 99.31 99.78 99.93
注意:在 dplyr
内,您还可以执行以下操作:
df <- df %>% rowwise() %>% mutate(y1 = round(gompertz(1:12,100,mu,lambda), digits = 2)[1],
y2 = round(gompertz(1:12,100,mu,lambda), digits = 2)[2],
y3 = round(gompertz(1:12,100,mu,lambda), digits = 2)[3],
y4 = round(gompertz(1:12,100,mu,lambda), digits = 2)[4],
y5 = round(gompertz(1:12,100,mu,lambda), digits = 2)[5])
重复6-12,得到同样的结果。
您可以使用 lapply()
:
library(grofit)
df2 <- do.call(rbind, lapply(1:nrow(df),
function(x) round(gompertz(1:12, 100, df[x, 3], df[x, 4]),
digits = 2)))
df3 <- cbind(df, df2)
结果:
> head(df3)
location year mu lambda 1 2 3 4 5 6 7 8 9 10 11 12
1 1 1980 43.04565 4.536717 0 0 0 0.61 20.58 61.23 85.88 95.39 98.54 99.55 99.86 99.96
2 1 1981 39.00524 4.505235 0 0 0 0.96 20.02 57.28 82.45 93.53 97.71 99.20 99.72 99.90
3 1 1982 41.60206 4.619627 0 0 0 0.42 17.07 56.52 83.18 94.23 98.10 99.38 99.80 99.94
4 1 1983 42.01069 4.689058 0 0 0 0.26 14.87 54.43 82.35 93.99 98.04 99.37 99.80 99.94
5 1 1984 40.34275 4.692595 0 0 0 0.30 14.36 52.30 80.54 93.03 97.61 99.20 99.73 99.91
6 1 1985 41.13246 4.641404 0 0 0 0.38 16.20 55.15 82.32 93.84 97.94 99.32 99.78 99.93
数据:
set.seed(47) # for sake of reproducibility
df <- data.frame(location = rep(1:1000, each = 36),
year = rep(1980:2015, times = 1000),
mu = runif(min = 36.5, max = 43.2, 1000 * 36),
lambda = runif(min = 4.5, max = 4.8, 1000 * 36))
这是一个 tidyverse
解决方案:
df <- head(df) # we'll work on a sample
library(tidyverse)
df %>%
mutate(y = map2(mu,lambda,gompertz,time= 1:12,A = 100),
y = map(y,. %>% round(2) %>% t %>% as_tibble)) %>% # we reformat the vectors as one line tibbles for smooth unnesting
unnest %>%
rename_at(5:16,~paste0("y",1:12))
# location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
# 1 1 1980 38.52133 4.793232 0 0 0 0.20 11.20 46.38 76.37 90.97 96.73 98.84 99.59 99.86
# 2 1 1981 41.05032 4.668713 0 0 0 0.32 15.29 54.04 81.74 93.61 97.86 99.29 99.77 99.92
# 3 1 1982 36.76366 4.687794 0 0 0 0.45 13.67 48.07 76.37 90.55 96.41 98.66 99.51 99.82
# 4 1 1983 42.47994 4.766380 0 0 0 0.14 12.55 51.99 81.37 93.71 97.97 99.36 99.80 99.94
# 5 1 1984 36.58161 4.510503 0 0 0 1.09 18.81 53.90 79.56 91.89 96.92 98.85 99.57 99.84
# 6 1 1985 41.77695 4.705588 0 0 0 0.23 14.29 53.52 81.81 93.75 97.95 99.34 99.79 99.93
和一个应该 运行 更快的基础版本:
new_df <- cbind(df,round(t(mapply(gompertz, df$mu, df$lambda,MoreArgs = list(time= 1:12, A = 100))),2))
names(new_df)[5:16] <- paste0("y",1:12)
# location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
# 1 1 1980 38.52133 4.793232 0 0 0 0.20 11.20 46.38 76.37 90.97 96.73 98.84 99.59 99.86
# 2 1 1981 41.05032 4.668713 0 0 0 0.32 15.29 54.04 81.74 93.61 97.86 99.29 99.77 99.92
# 3 1 1982 36.76366 4.687794 0 0 0 0.45 13.67 48.07 76.37 90.55 96.41 98.66 99.51 99.82
# 4 1 1983 42.47994 4.766380 0 0 0 0.14 12.55 51.99 81.37 93.71 97.97 99.36 99.80 99.94
# 5 1 1984 36.58161 4.510503 0 0 0 1.09 18.81 53.90 79.56 91.89 96.92 98.85 99.57 99.84
# 6 1 1985 41.77695 4.705588 0 0 0 0.23 14.29 53.52 81.81 93.75 97.95 99.34 99.79 99.93
mapply
的一个不经常使用的替代方法是 Vectorize
,在这种情况下,我认为它的使用是合理的,因为这个函数看起来确实应该首先被矢量化。
gompertz2 <- Vectorize(gompertz,c("mu","lambda"))
new_df <- cbind(df,round(t(gompertz2(1:12, 100, df$mu,df$lambda)),2))
names(new_df)[5:16] <- paste0("y",1:12)
# same output
示例数据
df <- data.frame(location = rep(1:1000, each = 36),
year = rep(1980:2015,times = 1000),
mu = runif(min = 36.5, max = 43.2, 1000*36),
lambda = runif(min = 4.5, max = 4.8, 1000*36))
此数据包含 1000 个地点和 36 年,有两个变量 mu 和 lambda
对于每个位置 X 年组合,我有一个函数需要 lambda 和 mu 的值并生成大小为 12 的向量。示例:
library(grofit)
dat <- df[df$location == 1 & df$year == 1980,]
y <- round(gompertz(1:12,100,dat$mu,dat$lambda), digits = 2)
y
[1] 0.00 0.00 0.00 0.72 18.60 56.37 82.26 93.56 97.76 99.23
[11] 99.74 99.91
如果我想将 y
作为列添加到 dat
new.col <- 5:16
dat[new.col] <- y
dat
location year mu lambda V5 V6 V7 V8 V9 V10 V11
1 1980 39.60263 4.554095 0 0 0 0.72 18.6 56.37 82.26
V12 V13 V14 V15 V16
1 93.56 97.76 99.23 99.74 99.91
如您所见,我在 dat
中将 y 作为 V5 至 V16 列附加。我想对 df
中的所有位置和年份组合重复此操作。我希望这是清楚的。
df %>% group_by(location year) %>% mutate(?? how to I add new columns for y??)
使用您生成的数据,无需 summarise()
使用 dplyr。每条记录都是独一无二的。所以这似乎更像是一个使用 apply()
.
有很多方法可以遍历它;我刚刚创建了十二个语句。我们将 df
的 mu,lamda 列传递给 apply 函数,然后在每 36000 行中使用您的函数将该向量的 12 部分抓取到 12 个新变量中 y1:y12.
df$y1 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[1])
df$y2 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[2])
df$y3 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[3])
df$y4 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[4])
df$y5 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[5])
df$y6 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[6])
df$y7 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[7])
df$y8 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[8])
df$y9 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[9])
df$y10 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[10])
df$y11 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[11])
df$y12 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[12])
head(df)
location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
1 1 1980 38.70790 4.531560 0 0 0 0.86 19.00 56.00 81.67 93.18 97.56 99.14 99.70 99.89
2 1 1981 42.64717 4.765444 0 0 0 0.14 12.60 52.22 81.56 93.81 98.01 99.37 99.80 99.94
3 1 1982 39.19041 4.527792 0 0 0 0.85 19.33 56.75 82.27 93.49 97.71 99.20 99.73 99.91
4 1 1983 37.50859 4.565435 0 0 0 0.79 17.46 53.28 79.68 92.13 97.09 98.94 99.62 99.86
5 1 1984 36.71666 4.779357 0 0 0 0.27 11.29 44.76 74.36 89.65 96.05 98.53 99.45 99.80
6 1 1985 42.11325 4.783322 0 0 0 0.13 11.99 50.91 80.66 93.39 97.85 99.31 99.78 99.93
注意:在 dplyr
内,您还可以执行以下操作:
df <- df %>% rowwise() %>% mutate(y1 = round(gompertz(1:12,100,mu,lambda), digits = 2)[1],
y2 = round(gompertz(1:12,100,mu,lambda), digits = 2)[2],
y3 = round(gompertz(1:12,100,mu,lambda), digits = 2)[3],
y4 = round(gompertz(1:12,100,mu,lambda), digits = 2)[4],
y5 = round(gompertz(1:12,100,mu,lambda), digits = 2)[5])
重复6-12,得到同样的结果。
您可以使用 lapply()
:
library(grofit)
df2 <- do.call(rbind, lapply(1:nrow(df),
function(x) round(gompertz(1:12, 100, df[x, 3], df[x, 4]),
digits = 2)))
df3 <- cbind(df, df2)
结果:
> head(df3)
location year mu lambda 1 2 3 4 5 6 7 8 9 10 11 12
1 1 1980 43.04565 4.536717 0 0 0 0.61 20.58 61.23 85.88 95.39 98.54 99.55 99.86 99.96
2 1 1981 39.00524 4.505235 0 0 0 0.96 20.02 57.28 82.45 93.53 97.71 99.20 99.72 99.90
3 1 1982 41.60206 4.619627 0 0 0 0.42 17.07 56.52 83.18 94.23 98.10 99.38 99.80 99.94
4 1 1983 42.01069 4.689058 0 0 0 0.26 14.87 54.43 82.35 93.99 98.04 99.37 99.80 99.94
5 1 1984 40.34275 4.692595 0 0 0 0.30 14.36 52.30 80.54 93.03 97.61 99.20 99.73 99.91
6 1 1985 41.13246 4.641404 0 0 0 0.38 16.20 55.15 82.32 93.84 97.94 99.32 99.78 99.93
数据:
set.seed(47) # for sake of reproducibility
df <- data.frame(location = rep(1:1000, each = 36),
year = rep(1980:2015, times = 1000),
mu = runif(min = 36.5, max = 43.2, 1000 * 36),
lambda = runif(min = 4.5, max = 4.8, 1000 * 36))
这是一个 tidyverse
解决方案:
df <- head(df) # we'll work on a sample
library(tidyverse)
df %>%
mutate(y = map2(mu,lambda,gompertz,time= 1:12,A = 100),
y = map(y,. %>% round(2) %>% t %>% as_tibble)) %>% # we reformat the vectors as one line tibbles for smooth unnesting
unnest %>%
rename_at(5:16,~paste0("y",1:12))
# location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
# 1 1 1980 38.52133 4.793232 0 0 0 0.20 11.20 46.38 76.37 90.97 96.73 98.84 99.59 99.86
# 2 1 1981 41.05032 4.668713 0 0 0 0.32 15.29 54.04 81.74 93.61 97.86 99.29 99.77 99.92
# 3 1 1982 36.76366 4.687794 0 0 0 0.45 13.67 48.07 76.37 90.55 96.41 98.66 99.51 99.82
# 4 1 1983 42.47994 4.766380 0 0 0 0.14 12.55 51.99 81.37 93.71 97.97 99.36 99.80 99.94
# 5 1 1984 36.58161 4.510503 0 0 0 1.09 18.81 53.90 79.56 91.89 96.92 98.85 99.57 99.84
# 6 1 1985 41.77695 4.705588 0 0 0 0.23 14.29 53.52 81.81 93.75 97.95 99.34 99.79 99.93
和一个应该 运行 更快的基础版本:
new_df <- cbind(df,round(t(mapply(gompertz, df$mu, df$lambda,MoreArgs = list(time= 1:12, A = 100))),2))
names(new_df)[5:16] <- paste0("y",1:12)
# location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
# 1 1 1980 38.52133 4.793232 0 0 0 0.20 11.20 46.38 76.37 90.97 96.73 98.84 99.59 99.86
# 2 1 1981 41.05032 4.668713 0 0 0 0.32 15.29 54.04 81.74 93.61 97.86 99.29 99.77 99.92
# 3 1 1982 36.76366 4.687794 0 0 0 0.45 13.67 48.07 76.37 90.55 96.41 98.66 99.51 99.82
# 4 1 1983 42.47994 4.766380 0 0 0 0.14 12.55 51.99 81.37 93.71 97.97 99.36 99.80 99.94
# 5 1 1984 36.58161 4.510503 0 0 0 1.09 18.81 53.90 79.56 91.89 96.92 98.85 99.57 99.84
# 6 1 1985 41.77695 4.705588 0 0 0 0.23 14.29 53.52 81.81 93.75 97.95 99.34 99.79 99.93
mapply
的一个不经常使用的替代方法是 Vectorize
,在这种情况下,我认为它的使用是合理的,因为这个函数看起来确实应该首先被矢量化。
gompertz2 <- Vectorize(gompertz,c("mu","lambda"))
new_df <- cbind(df,round(t(gompertz2(1:12, 100, df$mu,df$lambda)),2))
names(new_df)[5:16] <- paste0("y",1:12)
# same output