使用列表 运行 一组代码进行多项统计测试
Using lists to run multiple statistics test with one set of code
我有兴趣使用列表 运行 通过一组代码进行多项统计测试。
例如,我想 运行 glm() 测试根据数据框/列表中的行在 DV、IV、数据和系列方面有所不同。我可以长期执行此操作,并且可以使用 lapply() 以“中等方式”执行此操作,以便我可以更改测试中使用的 DV。但我想知道是否有一种方法{最好使用 lapply()} 以更少的代码和更 automated/iterative 的方式完成此任务。
对于示例数据,我使用 ggplot2::diamonds 数据和以下代码创建了 2 个数据集:
### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)
### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)
然后我使用这些数据创建了一个包含列表信息的数据框,并得到了这些结果:
## creates df with variable info
model_variable_df <-
data.frame(
cbind(
DV_name = c("carat", "depth", "price"),
DV_label = c("carat size", "depth size", "diamond price"),
dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
family = c("poisson", "poisson", "gaussian")
)
)
> model_variable_df
DV_name DV_label dataset_name IV_name family
1 carat carat size diamonds_bottom300 x + y + color poisson
2 depth depth size diamonds_bottom300 x + y + clarity poisson
3 price diamond price diamonds_top300 x + z + color gaussian
我可以使用 long 方法完成我的任务:
## long form of 3 models
### creates first model
freq_glm_poisson_carat <-
(
glm(
carat ~ x + y + color,
data = diamonds_bottom300,
family = poisson()
)
)
### creates 2nd model
freq_glm_binomial_depth <-
glm(
depth ~ x + y + clarity,
data = diamonds_bottom300,
family= poisson()
)
### creates 3rd model
freq_glm_gaussian_price <-
glm(
price ~ x + z + color,
data = diamonds_top300,
family= gaussian()
)
我也可以使用媒介方法来完成更具体和有限的 DV_name
任务。
## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
lapply(model_variable_df$DV_name,
function(DV_list) wrapr::let(
c(DV_col = DV_list,
dataset_obj = "diamonds_top300"),
glm(
DV_col ~ x + y + color,
data = dataset_obj,
family = poisson()
)
)
)
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter",
model_variable_df$DV_name,
sep = "__")
)
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")
有什么方法可以让我使用 (1) 更少的代码和 (2) 更多的代码来完成这项任务 iteration/automation?非常感谢任何帮助。
仅供参考,我在 2013 Intel Macbook Pro 上使用 RStudio。
谢谢。
练习代码:
# sets up data
## Loads packages
# ---- NOTE: making plots and diamonds dataset
if(!require(ggplot2)){install.packages("ggplot2")}
# ---- NOTE: run mixed effects models
if(!require(lme4)){install.packages("lme4")}
# ---- NOTE: for data wrangling
if(!require(dplyr)){install.packages("dplyr")}
# ---- NOTE: for iteration
if(!require(wrapr)){install.packages("wrapr")}
## dataset creation
### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)
### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)
## creates df with variable info
model_variable_df <-
data.frame(
cbind(
DV_name = c("carat", "depth", "price"),
DV_label = c("carat size", "depth size", "diamond price"),
dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
family = c("poisson", "poisson", "gaussian")
)
)
## long for of 3 models
### creates first model
freq_glm_poisson_carat <-
(
glm(
carat ~ x + y + color,
data = diamonds_bottom300,
family = poisson()
)
)
### creates 2nd model
freq_glm_binomial_depth <-
glm(
depth ~ x + y + clarity,
data = diamonds_bottom300,
family= poisson()
)
### creates 3rd model
freq_glm_gaussian_price <-
glm(
price ~ x + z + color,
data = diamonds_top300,
family= gaussian()
)
## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
lapply(model_variable_df$DV_name,
function(DV_list) wrapr::let(
c(DV_col = DV_list,
dataset_obj = "diamonds_top300"),
glm(
DV_col ~ x + y + color,
data = dataset_obj,
family = poisson()
)
)
)
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter",
model_variable_df$DV_name,
sep = "__")
)
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")
您可以使用 lapply
执行此操作:
lapply(seq(nrow(model_variable_df)), function(i) {
val <- model_variable_df[i, ]
glm(as.formula(paste(val$DV_name, val$IV_name, sep = '~')),
data = get(val$dataset_name), family = val$family)
}) -> model_list
as.formula
用于将字符串转换为公式,get
用于从字符串值中获取数据集。
我有兴趣使用列表 运行 通过一组代码进行多项统计测试。
例如,我想 运行 glm() 测试根据数据框/列表中的行在 DV、IV、数据和系列方面有所不同。我可以长期执行此操作,并且可以使用 lapply() 以“中等方式”执行此操作,以便我可以更改测试中使用的 DV。但我想知道是否有一种方法{最好使用 lapply()} 以更少的代码和更 automated/iterative 的方式完成此任务。
对于示例数据,我使用 ggplot2::diamonds 数据和以下代码创建了 2 个数据集:
### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)
### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)
然后我使用这些数据创建了一个包含列表信息的数据框,并得到了这些结果:
## creates df with variable info
model_variable_df <-
data.frame(
cbind(
DV_name = c("carat", "depth", "price"),
DV_label = c("carat size", "depth size", "diamond price"),
dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
family = c("poisson", "poisson", "gaussian")
)
)
> model_variable_df
DV_name DV_label dataset_name IV_name family
1 carat carat size diamonds_bottom300 x + y + color poisson
2 depth depth size diamonds_bottom300 x + y + clarity poisson
3 price diamond price diamonds_top300 x + z + color gaussian
我可以使用 long 方法完成我的任务:
## long form of 3 models
### creates first model
freq_glm_poisson_carat <-
(
glm(
carat ~ x + y + color,
data = diamonds_bottom300,
family = poisson()
)
)
### creates 2nd model
freq_glm_binomial_depth <-
glm(
depth ~ x + y + clarity,
data = diamonds_bottom300,
family= poisson()
)
### creates 3rd model
freq_glm_gaussian_price <-
glm(
price ~ x + z + color,
data = diamonds_top300,
family= gaussian()
)
我也可以使用媒介方法来完成更具体和有限的 DV_name
任务。
## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
lapply(model_variable_df$DV_name,
function(DV_list) wrapr::let(
c(DV_col = DV_list,
dataset_obj = "diamonds_top300"),
glm(
DV_col ~ x + y + color,
data = dataset_obj,
family = poisson()
)
)
)
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter",
model_variable_df$DV_name,
sep = "__")
)
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")
有什么方法可以让我使用 (1) 更少的代码和 (2) 更多的代码来完成这项任务 iteration/automation?非常感谢任何帮助。
仅供参考,我在 2013 Intel Macbook Pro 上使用 RStudio。
谢谢。
练习代码:
# sets up data
## Loads packages
# ---- NOTE: making plots and diamonds dataset
if(!require(ggplot2)){install.packages("ggplot2")}
# ---- NOTE: run mixed effects models
if(!require(lme4)){install.packages("lme4")}
# ---- NOTE: for data wrangling
if(!require(dplyr)){install.packages("dplyr")}
# ---- NOTE: for iteration
if(!require(wrapr)){install.packages("wrapr")}
## dataset creation
### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)
### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)
## creates df with variable info
model_variable_df <-
data.frame(
cbind(
DV_name = c("carat", "depth", "price"),
DV_label = c("carat size", "depth size", "diamond price"),
dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
family = c("poisson", "poisson", "gaussian")
)
)
## long for of 3 models
### creates first model
freq_glm_poisson_carat <-
(
glm(
carat ~ x + y + color,
data = diamonds_bottom300,
family = poisson()
)
)
### creates 2nd model
freq_glm_binomial_depth <-
glm(
depth ~ x + y + clarity,
data = diamonds_bottom300,
family= poisson()
)
### creates 3rd model
freq_glm_gaussian_price <-
glm(
price ~ x + z + color,
data = diamonds_top300,
family= gaussian()
)
## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
lapply(model_variable_df$DV_name,
function(DV_list) wrapr::let(
c(DV_col = DV_list,
dataset_obj = "diamonds_top300"),
glm(
DV_col ~ x + y + color,
data = dataset_obj,
family = poisson()
)
)
)
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter",
model_variable_df$DV_name,
sep = "__")
)
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")
您可以使用 lapply
执行此操作:
lapply(seq(nrow(model_variable_df)), function(i) {
val <- model_variable_df[i, ]
glm(as.formula(paste(val$DV_name, val$IV_name, sep = '~')),
data = get(val$dataset_name), family = val$family)
}) -> model_list
as.formula
用于将字符串转换为公式,get
用于从字符串值中获取数据集。