迭代 dplyR 中的字符向量 summarize 并使用它来分配新的列名
Iterate over character vector in dplyR summarise and use it to assign new column names
我有一个带有变量名称的字符向量:
variables -> c("w", "x", "y", "z")
我需要创建一个函数来计算指定参数(如下所示的 alpha)的每个变量的平均值。但是,它不会使用迭代变量名称重命名列,也不会将 alpha
列减少到左侧的一列。
calc <- function(df,
parameter,
iteration,
variables){
variable <- sym(variables[iteration])
mean <- df %>% group_by(.dots = parameter) %>%
summarise(variable = mean(!!variable),sd_variable = sd(!!variable))
return(mean)
}
means <- map_dfc(1:length(variables), ~calc(df = input,
parameter = "alpha",
iteration = .,
variables = variables))
理想情况下,输出 df (means
) 应如下所示:
alpha | w | sd_w | x | sd_x | y | sd_y | z | sd_z |
这是输入 df 的样子:
structure(list(time = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 999.5, 999.6,
999.7, 999.8, 999.9, 1000), w = c(10, 10.0057192322758, 10.0198266325956,
10.040096099625, 10.0637654242843, 10.087779652849, 0.889708853982268,
0.890916575744663, 0.892121389863897, 0.89332329218135, 0.894522278550115,
0.895718344834999), x = c(10, 11.0467963604334, 12.1782709261765,
13.3728962503142, 14.6035317074526, 15.8398164069251, 62.6631746231113,
62.6583134156356, 62.6534565303638, 62.648604016965, 62.6437559251575,
62.6389123047088), y = c(10, 9.89605687874935, 9.59253574727296,
9.11222320249057, 8.48917353431654, 7.76447036695841, 0.00833796964522317,
0.00835876233547079, 0.00837957883570158, 0.00840041916631544,
0.00842128334742553, 0.00844217139885453), z = c(10, 9.05439359565339,
8.21533762023494, 7.48379901688836, 6.85562632179817, 6.3231517466183,
-7.50539460838544, -7.48234149534558, -7.45927733670319, -7.43620225192078,
-7.41311636057114, -7.39001978233681), alpha = c(0.1, 0.1, 0.1,
0.1, 0.1, 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), beta = c(0.1, 0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), eta = c(0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), zeta = c(0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), lambda = c(0.95,
0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95
), phi = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), kappa = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), ode_outputs..iteration.. = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c("1",
"1.1", "1.2", "1.3", "1.4", "1.5", "3.9995", "3.9996", "3.9997",
"3.9998", "3.9999", "3.10000"), class = "data.frame")
理想情况下,该函数将使用 dplyr and/or baseR。
如果我的理解正确,则无需遍历列。都可以直接在dplyr
...
中完成
library(tidyverse)
df %>%
group_by(alpha) %>%
summarise(
across(
c(w, x, y, z),
list(mean=mean, sd=sd)
),
.groups="drop"
) %>%
rename_with(function(x) str_sub(x,1,1), ends_with("mean"))
# A tibble: 2 x 9
alpha w w_sd x x_sd y y_sd z z_sd
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.1 10.0 0.0345 12.8 2.20 9.14 0.875 7.99 1.38
2 0.5 0.893 0.00225 62.7 0.00908 0.00839 0.0000390 -7.45 0.0432
我有一个带有变量名称的字符向量:
variables -> c("w", "x", "y", "z")
我需要创建一个函数来计算指定参数(如下所示的 alpha)的每个变量的平均值。但是,它不会使用迭代变量名称重命名列,也不会将 alpha
列减少到左侧的一列。
calc <- function(df,
parameter,
iteration,
variables){
variable <- sym(variables[iteration])
mean <- df %>% group_by(.dots = parameter) %>%
summarise(variable = mean(!!variable),sd_variable = sd(!!variable))
return(mean)
}
means <- map_dfc(1:length(variables), ~calc(df = input,
parameter = "alpha",
iteration = .,
variables = variables))
理想情况下,输出 df (means
) 应如下所示:
alpha | w | sd_w | x | sd_x | y | sd_y | z | sd_z |
这是输入 df 的样子:
structure(list(time = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 999.5, 999.6,
999.7, 999.8, 999.9, 1000), w = c(10, 10.0057192322758, 10.0198266325956,
10.040096099625, 10.0637654242843, 10.087779652849, 0.889708853982268,
0.890916575744663, 0.892121389863897, 0.89332329218135, 0.894522278550115,
0.895718344834999), x = c(10, 11.0467963604334, 12.1782709261765,
13.3728962503142, 14.6035317074526, 15.8398164069251, 62.6631746231113,
62.6583134156356, 62.6534565303638, 62.648604016965, 62.6437559251575,
62.6389123047088), y = c(10, 9.89605687874935, 9.59253574727296,
9.11222320249057, 8.48917353431654, 7.76447036695841, 0.00833796964522317,
0.00835876233547079, 0.00837957883570158, 0.00840041916631544,
0.00842128334742553, 0.00844217139885453), z = c(10, 9.05439359565339,
8.21533762023494, 7.48379901688836, 6.85562632179817, 6.3231517466183,
-7.50539460838544, -7.48234149534558, -7.45927733670319, -7.43620225192078,
-7.41311636057114, -7.39001978233681), alpha = c(0.1, 0.1, 0.1,
0.1, 0.1, 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), beta = c(0.1, 0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), eta = c(0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), zeta = c(0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), lambda = c(0.95,
0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95
), phi = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), kappa = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), ode_outputs..iteration.. = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c("1",
"1.1", "1.2", "1.3", "1.4", "1.5", "3.9995", "3.9996", "3.9997",
"3.9998", "3.9999", "3.10000"), class = "data.frame")
理想情况下,该函数将使用 dplyr and/or baseR。
如果我的理解正确,则无需遍历列。都可以直接在dplyr
...
library(tidyverse)
df %>%
group_by(alpha) %>%
summarise(
across(
c(w, x, y, z),
list(mean=mean, sd=sd)
),
.groups="drop"
) %>%
rename_with(function(x) str_sub(x,1,1), ends_with("mean"))
# A tibble: 2 x 9
alpha w w_sd x x_sd y y_sd z z_sd
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.1 10.0 0.0345 12.8 2.20 9.14 0.875 7.99 1.38
2 0.5 0.893 0.00225 62.7 0.00908 0.00839 0.0000390 -7.45 0.0432