使用 rlang 列出 purrr::pmap 中的变量
using rlang to list variables inside purrr::pmap
我正在尝试编写一个名为 grouped_lm,
的函数,它基本上是针对多个 [=62] 的分组变量 (grouping.vars
) 组合的每个级别的 运行s 线性回归模型=] (crit.vars
) 和 predictor/independent 变量 (pred.vars
)。
这样做的方式是,第一次进入 crit.vars
会回归到 pred.vars
。
例如,如果我输入 grouping.vars = am
、crit.vars = c(mpg, drat)
和 crit.vars = c(wt, disp)
(在 mtcars
数据集的上下文中),该函数将 运行 两个回归模型(mpg ~ wt
和 drat ~ disp
) 对于每个级别的分组变量 am
(am = 0
和 am = 1
)。
我设法从输入的变量创建了一个数据框,编写了一个 运行 线性回归模型的自定义函数,但似乎无法弄清楚如何使用 rlang
来输入将输入 purrr::pmap
的列表元素中的变量。
对于冗长的问题深表歉意,并提前感谢您提供的任何帮助。
# libraries needed
library(tidyverse)
library(plyr)
# function definition
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = list(!!!pred.vars),
y_name = list(!!!crit.vars)
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
# example usage of the function
grouped_lm(
data = iris,
crit.vars = c(Sepal.Length, Petal.Length),
pred.vars = c(Sepal.Width, Petal.Width),
grouping.vars = Species
)
#> the entire nested dataframe:
#> # A tibble: 3 x 2
#> Species data
#> <fct> <list>
#> 1 setosa <tibble [50 x 4]>
#> 2 versicolor <tibble [50 x 4]>
#> 3 virginica <tibble [50 x 4]>
#> first element of the list column from nested dataframe:
#> # A tibble: 50 x 4
#> Sepal.Length Petal.Length Sepal.Width Petal.Width
#> <dbl> <dbl> <dbl> <dbl>
#> 1 5.10 1.40 3.50 0.200
#> 2 4.90 1.40 3.00 0.200
#> 3 4.70 1.30 3.20 0.200
#> 4 4.60 1.50 3.10 0.200
#> 5 5.00 1.40 3.60 0.200
#> 6 5.40 1.70 3.90 0.400
#> 7 4.60 1.40 3.40 0.300
#> 8 5.00 1.50 3.40 0.200
#> 9 4.40 1.40 2.90 0.200
#> 10 4.90 1.50 3.10 0.100
#> # ... with 40 more rows
#> the custom function seems to be working!:
#> # A tibble: 4 x 7
#> group formula term estimate std.error t p.value
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 0 scale(mpg) ~ scale(wt) scale(~ -0.768 0.155 -4.94 1.25e-4
#> 2 1 scale(mpg) ~ scale(wt) scale(~ -0.909 0.126 -7.23 1.69e-5
#> 3 0 scale(drat) ~ scale(disp) scale(~ -0.614 0.192 -3.20 5.20e-3
#> 4 1 scale(drat) ~ scale(disp) scale(~ -0.305 0.287 -1.06 3.12e-1
#> running the custom function on the entered dataframe:
#> Error in !pred.vars: invalid argument type
由 reprex package (v0.2.0) 创建于 2018-03-23。
提供答案后编辑
我还想知道如何在每个分组变量的输出中获得单独的列。因此,根据下面提供的答案,如果我 运行-
grouped_lm(
data = mtcars,
crit.vars = c(wt, mpg),
pred.vars = c(drat, disp),
grouping.vars = c(am, cyl)
)
它有效,但输出看起来像这样:
从图中可以看出,1到6的值代表什么,一点也不清楚。因此,最好为提供的每个分组变量也获得一个单独的列,这样,在本例中,将有两列 am
和 cyl
,每个 lm
都有各自的级别型号。
(我手动创建了这个数据框。这不是分组的方式,但这只是为了展示所需的输出。)
如果我们需要复制与 'mtcars' 示例用法相同的行为,其中 x_name
和 y_name
是字符串而不是 symbols
(这是'pred.vars' 和 'crit.vars' 的情况下),将它们转换为带有 quo_name
的字符串,即
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = map(pred.vars, quo_name),
y_name = map(crit.vars, quo_name)
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
或作为 symbol
传递而不进行任何评估,即 !!
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars, ###
y_name = crit.vars ###
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
这与 lm_listed
函数如何接受参数有关。将对象视为字符串
sl <- "Sepal.Length"
sw <- "Sepal.Width"
和glue
returns 正确
glue::glue("scale({sl}) ~ scale({sw})")
#scale(Sepal.Length) ~ scale(Sepal.Width)
现在,我们将其更改为 symbol
,它也有效
sl <- rlang::sym("Sepal.Length")
sw <- rlang::sym("Sepal.Width")
glue::glue("scale({sl}) ~ scale({sw})")
#scale(Sepal.Length) ~ scale(Sepal.Width)
但是,问题在于使用 !!
来评估作为输入参数传递的
sl <- !!rlang::sym("Sepal.Length")
Error in !rlang::sym("Sepal.Length") : invalid argument type
!!
在 tidyverse
函数的环境之外求值,导致错误
-完整代码
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars,
y_name = crit.vars
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
-运行函数
res <- grouped_lm(
data = iris,
crit.vars = c(Sepal.Length, Petal.Length),
pred.vars = c(Sepal.Width, Petal.Width),
grouping.vars = Species
)
-输出打印
#the entire nested dataframe:
# A tibble: 3 x 2
# Species data
# <fctr> <list>
#1 setosa <tibble [50 x 4]>
#2 versicolor <tibble [50 x 4]>
#3 virginica <tibble [50 x 4]>
#first element of the list column from nested dataframe:
# A tibble: 50 x 4
# Sepal.Length Petal.Length Sepal.Width Petal.Width
# <dbl> <dbl> <dbl> <dbl>
# 1 5.10 1.40 3.50 0.200
# 2 4.90 1.40 3.00 0.200
# 3 4.70 1.30 3.20 0.200
# 4 4.60 1.50 3.10 0.200
# 5 5.00 1.40 3.60 0.200
# 6 5.40 1.70 3.90 0.400
# 7 4.60 1.40 3.40 0.300
# 8 5.00 1.50 3.40 0.200
# 9 4.40 1.40 2.90 0.200
#10 4.90 1.50 3.10 0.100
# ... with 40 more rows
#the custom function seems to be working!:
# A tibble: 4 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 0 scale(mpg) ~ scale(wt) scale(wt) -0.768 0.155 -4.94 0.000125
#2 1 scale(mpg) ~ scale(wt) scale(wt) -0.909 0.126 -7.23 0.0000169
#3 0 scale(drat) ~ scale(disp) scale(disp) -0.614 0.192 -3.20 0.00520
#4 1 scale(drat) ~ scale(disp) scale(disp) -0.305 0.287 -1.06 0.312
#running the custom function on the entered dataframe:
# A tibble: 6 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 1 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.743 0.0967 7.68 0.000000000671
#2 2 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.526 0.123 4.28 0.0000877
#3 3 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.457 0.128 3.56 0.000843
#4 1 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.332 0.136 2.44 0.0186
#5 2 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.787 0.0891 8.83 0.0000000000127
#6 3 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.322 0.137 2.36 0.0225
-结果输出
res
# A tibble: 6 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 1 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.743 0.0967 7.68 0.000000000671
#2 2 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.526 0.123 4.28 0.0000877
#3 3 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.457 0.128 3.56 0.000843
#4 1 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.332 0.136 2.44 0.0186
#5 2 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.787 0.0891 8.83 0.0000000000127
#6 3 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.322 0.137 2.36 0.0225
如果我们需要在输出中也包含 'grouping.vars'
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df <- df %>%
tibble::rownames_to_column('group')
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars,
y_name = crit.vars
),
.f = lm_listed) %>%
dplyr::bind_rows() %>%
left_join(df) %>%
select(!!!grouping.vars, everything()) %>%
select(-group, -data)
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
-运行函数
r1 <- grouped_lm(
data = mtcars,
crit.vars = c(wt, mpg),
pred.vars = c(drat, disp),
grouping.vars = c(am, cyl)
)
-输出
r1
# A tibble: 12 x 8
# am cyl formula term estimate std.error t p.value
# <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 1.00 6.00 scale(wt) ~ scale(drat) scale(drat) -0.101 0.995 - 0.102 0.935
# 2 1.00 4.00 scale(wt) ~ scale(drat) scale(drat) -0.226 0.398 - 0.568 0.591
# 3 0 6.00 scale(wt) ~ scale(drat) scale(drat) 0.307 0.673 0.456 0.693
# 4 0 8.00 scale(wt) ~ scale(drat) scale(drat) -0.119 0.314 - 0.379 0.713
# 5 0 4.00 scale(wt) ~ scale(drat) scale(drat) 0.422 0.906 0.466 0.722
# 6 1.00 8.00 scale(wt) ~ scale(drat) scale(drat) -1.00 NaN NaN NaN
# 7 1.00 6.00 scale(mpg) ~ scale(disp) scale(disp) 1.00 0 Inf 0
# 8 1.00 4.00 scale(mpg) ~ scale(disp) scale(disp) -0.835 0.225 - 3.72 0.00991
# 9 0 6.00 scale(mpg) ~ scale(disp) scale(disp) 0.670 0.525 1.28 0.330
#10 0 8.00 scale(mpg) ~ scale(disp) scale(disp) -0.535 0.267 - 2.00 0.0729
#11 0 4.00 scale(mpg) ~ scale(disp) scale(disp) 0.932 0.362 2.57 0.236
#12 1.00 8.00 scale(mpg) ~ scale(disp) scale(disp) 1.00 NaN NaN NaN
我正在尝试编写一个名为 grouped_lm,
的函数,它基本上是针对多个 [=62] 的分组变量 (grouping.vars
) 组合的每个级别的 运行s 线性回归模型=] (crit.vars
) 和 predictor/independent 变量 (pred.vars
)。
这样做的方式是,第一次进入 crit.vars
会回归到 pred.vars
。
例如,如果我输入 grouping.vars = am
、crit.vars = c(mpg, drat)
和 crit.vars = c(wt, disp)
(在 mtcars
数据集的上下文中),该函数将 运行 两个回归模型(mpg ~ wt
和 drat ~ disp
) 对于每个级别的分组变量 am
(am = 0
和 am = 1
)。
我设法从输入的变量创建了一个数据框,编写了一个 运行 线性回归模型的自定义函数,但似乎无法弄清楚如何使用 rlang
来输入将输入 purrr::pmap
的列表元素中的变量。
对于冗长的问题深表歉意,并提前感谢您提供的任何帮助。
# libraries needed
library(tidyverse)
library(plyr)
# function definition
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = list(!!!pred.vars),
y_name = list(!!!crit.vars)
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
# example usage of the function
grouped_lm(
data = iris,
crit.vars = c(Sepal.Length, Petal.Length),
pred.vars = c(Sepal.Width, Petal.Width),
grouping.vars = Species
)
#> the entire nested dataframe:
#> # A tibble: 3 x 2
#> Species data
#> <fct> <list>
#> 1 setosa <tibble [50 x 4]>
#> 2 versicolor <tibble [50 x 4]>
#> 3 virginica <tibble [50 x 4]>
#> first element of the list column from nested dataframe:
#> # A tibble: 50 x 4
#> Sepal.Length Petal.Length Sepal.Width Petal.Width
#> <dbl> <dbl> <dbl> <dbl>
#> 1 5.10 1.40 3.50 0.200
#> 2 4.90 1.40 3.00 0.200
#> 3 4.70 1.30 3.20 0.200
#> 4 4.60 1.50 3.10 0.200
#> 5 5.00 1.40 3.60 0.200
#> 6 5.40 1.70 3.90 0.400
#> 7 4.60 1.40 3.40 0.300
#> 8 5.00 1.50 3.40 0.200
#> 9 4.40 1.40 2.90 0.200
#> 10 4.90 1.50 3.10 0.100
#> # ... with 40 more rows
#> the custom function seems to be working!:
#> # A tibble: 4 x 7
#> group formula term estimate std.error t p.value
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 0 scale(mpg) ~ scale(wt) scale(~ -0.768 0.155 -4.94 1.25e-4
#> 2 1 scale(mpg) ~ scale(wt) scale(~ -0.909 0.126 -7.23 1.69e-5
#> 3 0 scale(drat) ~ scale(disp) scale(~ -0.614 0.192 -3.20 5.20e-3
#> 4 1 scale(drat) ~ scale(disp) scale(~ -0.305 0.287 -1.06 3.12e-1
#> running the custom function on the entered dataframe:
#> Error in !pred.vars: invalid argument type
由 reprex package (v0.2.0) 创建于 2018-03-23。
提供答案后编辑
我还想知道如何在每个分组变量的输出中获得单独的列。因此,根据下面提供的答案,如果我 运行-
grouped_lm(
data = mtcars,
crit.vars = c(wt, mpg),
pred.vars = c(drat, disp),
grouping.vars = c(am, cyl)
)
它有效,但输出看起来像这样:
从图中可以看出,1到6的值代表什么,一点也不清楚。因此,最好为提供的每个分组变量也获得一个单独的列,这样,在本例中,将有两列 am
和 cyl
,每个 lm
都有各自的级别型号。
(我手动创建了这个数据框。这不是分组的方式,但这只是为了展示所需的输出。)
如果我们需要复制与 'mtcars' 示例用法相同的行为,其中 x_name
和 y_name
是字符串而不是 symbols
(这是'pred.vars' 和 'crit.vars' 的情况下),将它们转换为带有 quo_name
的字符串,即
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = map(pred.vars, quo_name),
y_name = map(crit.vars, quo_name)
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
或作为 symbol
传递而不进行任何评估,即 !!
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars, ###
y_name = crit.vars ###
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
这与 lm_listed
函数如何接受参数有关。将对象视为字符串
sl <- "Sepal.Length"
sw <- "Sepal.Width"
和glue
returns 正确
glue::glue("scale({sl}) ~ scale({sw})")
#scale(Sepal.Length) ~ scale(Sepal.Width)
现在,我们将其更改为 symbol
,它也有效
sl <- rlang::sym("Sepal.Length")
sw <- rlang::sym("Sepal.Width")
glue::glue("scale({sl}) ~ scale({sw})")
#scale(Sepal.Length) ~ scale(Sepal.Width)
但是,问题在于使用 !!
来评估作为输入参数传递的
sl <- !!rlang::sym("Sepal.Length")
Error in !rlang::sym("Sepal.Length") : invalid argument type
!!
在 tidyverse
函数的环境之外求值,导致错误
-完整代码
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars,
y_name = crit.vars
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
-运行函数
res <- grouped_lm(
data = iris,
crit.vars = c(Sepal.Length, Petal.Length),
pred.vars = c(Sepal.Width, Petal.Width),
grouping.vars = Species
)
-输出打印
#the entire nested dataframe:
# A tibble: 3 x 2
# Species data
# <fctr> <list>
#1 setosa <tibble [50 x 4]>
#2 versicolor <tibble [50 x 4]>
#3 virginica <tibble [50 x 4]>
#first element of the list column from nested dataframe:
# A tibble: 50 x 4
# Sepal.Length Petal.Length Sepal.Width Petal.Width
# <dbl> <dbl> <dbl> <dbl>
# 1 5.10 1.40 3.50 0.200
# 2 4.90 1.40 3.00 0.200
# 3 4.70 1.30 3.20 0.200
# 4 4.60 1.50 3.10 0.200
# 5 5.00 1.40 3.60 0.200
# 6 5.40 1.70 3.90 0.400
# 7 4.60 1.40 3.40 0.300
# 8 5.00 1.50 3.40 0.200
# 9 4.40 1.40 2.90 0.200
#10 4.90 1.50 3.10 0.100
# ... with 40 more rows
#the custom function seems to be working!:
# A tibble: 4 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 0 scale(mpg) ~ scale(wt) scale(wt) -0.768 0.155 -4.94 0.000125
#2 1 scale(mpg) ~ scale(wt) scale(wt) -0.909 0.126 -7.23 0.0000169
#3 0 scale(drat) ~ scale(disp) scale(disp) -0.614 0.192 -3.20 0.00520
#4 1 scale(drat) ~ scale(disp) scale(disp) -0.305 0.287 -1.06 0.312
#running the custom function on the entered dataframe:
# A tibble: 6 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 1 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.743 0.0967 7.68 0.000000000671
#2 2 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.526 0.123 4.28 0.0000877
#3 3 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.457 0.128 3.56 0.000843
#4 1 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.332 0.136 2.44 0.0186
#5 2 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.787 0.0891 8.83 0.0000000000127
#6 3 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.322 0.137 2.36 0.0225
-结果输出
res
# A tibble: 6 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 1 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.743 0.0967 7.68 0.000000000671
#2 2 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.526 0.123 4.28 0.0000877
#3 3 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.457 0.128 3.56 0.000843
#4 1 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.332 0.136 2.44 0.0186
#5 2 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.787 0.0891 8.83 0.0000000000127
#6 3 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.322 0.137 2.36 0.0225
如果我们需要在输出中也包含 'grouping.vars'
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df <- df %>%
tibble::rownames_to_column('group')
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars,
y_name = crit.vars
),
.f = lm_listed) %>%
dplyr::bind_rows() %>%
left_join(df) %>%
select(!!!grouping.vars, everything()) %>%
select(-group, -data)
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
-运行函数
r1 <- grouped_lm(
data = mtcars,
crit.vars = c(wt, mpg),
pred.vars = c(drat, disp),
grouping.vars = c(am, cyl)
)
-输出
r1
# A tibble: 12 x 8
# am cyl formula term estimate std.error t p.value
# <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 1.00 6.00 scale(wt) ~ scale(drat) scale(drat) -0.101 0.995 - 0.102 0.935
# 2 1.00 4.00 scale(wt) ~ scale(drat) scale(drat) -0.226 0.398 - 0.568 0.591
# 3 0 6.00 scale(wt) ~ scale(drat) scale(drat) 0.307 0.673 0.456 0.693
# 4 0 8.00 scale(wt) ~ scale(drat) scale(drat) -0.119 0.314 - 0.379 0.713
# 5 0 4.00 scale(wt) ~ scale(drat) scale(drat) 0.422 0.906 0.466 0.722
# 6 1.00 8.00 scale(wt) ~ scale(drat) scale(drat) -1.00 NaN NaN NaN
# 7 1.00 6.00 scale(mpg) ~ scale(disp) scale(disp) 1.00 0 Inf 0
# 8 1.00 4.00 scale(mpg) ~ scale(disp) scale(disp) -0.835 0.225 - 3.72 0.00991
# 9 0 6.00 scale(mpg) ~ scale(disp) scale(disp) 0.670 0.525 1.28 0.330
#10 0 8.00 scale(mpg) ~ scale(disp) scale(disp) -0.535 0.267 - 2.00 0.0729
#11 0 4.00 scale(mpg) ~ scale(disp) scale(disp) 0.932 0.362 2.57 0.236
#12 1.00 8.00 scale(mpg) ~ scale(disp) scale(disp) 1.00 NaN NaN NaN