在回归循环中寻找 AIC 和 R 平方
Finding AIC and R-square in regression loop
我发现了很多类似的问题,但我没有找到可以帮助我解决以下情况的问题。提前致谢,抱歉重复。这是我的数据:
data_model3 <- structure(list(Year = c(1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017, 2018, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
2017, 2018, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("mortality_rate_all_ages",
"mortality_rate_under1y", "mortality_rate_1to10y", "mortality_rate_10to20y",
"mortality_rate_20to30y", "mortality_rate_30to40y", "mortality_rate_40to50y",
"mortality_rate_50to60y", "mortality_rate_60to70y", "mortality_rate_70to80y",
"mortality_rate_80to90y", "mortality_rate_above90y"), class = "factor"),
value = c(0.0088, 0.0077, 0.0082, 0.0075, 0.0076, 0.0075,
0.0066, 0.0061, 0.0059, 0.0054, 0.0054, 0.0058, 0.0056, 0.006,
0.0053, 0.0061, 0.0052, 0.0055, 0.0069, 0.0074, 0.0073, 0.5823,
0.5251, 0.514, 0.4852, 0.5144, 0.4615, 0.4043, 0.3615, 0.3565,
0.3209, 0.3234, 0.3443, 0.3347, 0.357, 0.3025, 0.3309, 0.2778,
0.2551, 0.3197, 0.3299, 0.2679, 0.0098, 0.0069, 0.0098, 0.0086,
0.0073, 0.0106, 0.0058, 0.007, 0.0052, 0.0055, 0.0051, 0.0059,
0.0063, 0.0061, 0.0066, 0.0048, 0.0043, 0.0053)), row.names = c(NA,
60L), class = "data.frame")
对于每个年龄组,我想执行多项式回归并获得 beta 系数、SE、p 值、AIC、调整后的 R 平方值。
我做了以下操作,但我不知道如何获取 AIC 并将 R 平方值调整到回归循环并将它们保存在 P 值列旁边的新列中:
library(tidyverse)
library(broom)
poly1_all_ages <- data_model3 %>%
group_by(variable) %>%
do(tidy(lm(value ~ poly(Year, 1), .))) %>%
mutate(Beta = as.character(round(estimate, 6)), "P Value" = round(p.value, 6), SE = round(std.error, 6)) %>%
select(Beta, SE, "P Value") %>%
as.data.frame()
我得到的结果是:
variable Beta SE P Value
1 mortality_rate_all_ages 0.006562 0.000206 0.000000
2 mortality_rate_all_ages -0.002494 0.000944 0.016081
3 mortality_rate_under1y 0.379471 0.009440 0.000000
4 mortality_rate_under1y -0.381255 0.043261 0.000000
5 mortality_rate_1to10y 0.006717 0.000302 0.000000
6 mortality_rate_1to10y -0.00564 0.001281 0.000446
broom::tidy()
用于特定于预测变量的信息。您将需要 broom::glance()
来获取特定于模型的信息,例如 AIC。
对于每一级变量,如下代码
- 符合模型
- 使用
broom::tidy()
将与预测变量相关的信息提取为长格式
- 将 #2 长格式转换为宽格式,因此所有预测信息都在一行中
- 使用
broom::glance()
提取模型相关信息
- 列将预测变量和模型级别的信息绑定在一起
library(dplyr)
library(broom)
library(tidyr)
data_model3 %>%
group_by(variable) %>%
do({
my_mdl <- lm(value ~ poly(Year, 1), .)
my_tidy <- tidy(my_mdl)
my_tidy_wide <- pivot_wider(my_tidy, names_from = "term", values_from = everything())
my_glance <- glance(my_mdl)
bind_cols(my_tidy_wide, my_glance)
}) %>%
select(variable,
starts_with("estimate"),
starts_with("std.error"),
starts_with("p.value"),
AIC,
adj.r.squared)
我发现了很多类似的问题,但我没有找到可以帮助我解决以下情况的问题。提前致谢,抱歉重复。这是我的数据:
data_model3 <- structure(list(Year = c(1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017, 2018, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
2017, 2018, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("mortality_rate_all_ages",
"mortality_rate_under1y", "mortality_rate_1to10y", "mortality_rate_10to20y",
"mortality_rate_20to30y", "mortality_rate_30to40y", "mortality_rate_40to50y",
"mortality_rate_50to60y", "mortality_rate_60to70y", "mortality_rate_70to80y",
"mortality_rate_80to90y", "mortality_rate_above90y"), class = "factor"),
value = c(0.0088, 0.0077, 0.0082, 0.0075, 0.0076, 0.0075,
0.0066, 0.0061, 0.0059, 0.0054, 0.0054, 0.0058, 0.0056, 0.006,
0.0053, 0.0061, 0.0052, 0.0055, 0.0069, 0.0074, 0.0073, 0.5823,
0.5251, 0.514, 0.4852, 0.5144, 0.4615, 0.4043, 0.3615, 0.3565,
0.3209, 0.3234, 0.3443, 0.3347, 0.357, 0.3025, 0.3309, 0.2778,
0.2551, 0.3197, 0.3299, 0.2679, 0.0098, 0.0069, 0.0098, 0.0086,
0.0073, 0.0106, 0.0058, 0.007, 0.0052, 0.0055, 0.0051, 0.0059,
0.0063, 0.0061, 0.0066, 0.0048, 0.0043, 0.0053)), row.names = c(NA,
60L), class = "data.frame")
对于每个年龄组,我想执行多项式回归并获得 beta 系数、SE、p 值、AIC、调整后的 R 平方值。
我做了以下操作,但我不知道如何获取 AIC 并将 R 平方值调整到回归循环并将它们保存在 P 值列旁边的新列中:
library(tidyverse)
library(broom)
poly1_all_ages <- data_model3 %>%
group_by(variable) %>%
do(tidy(lm(value ~ poly(Year, 1), .))) %>%
mutate(Beta = as.character(round(estimate, 6)), "P Value" = round(p.value, 6), SE = round(std.error, 6)) %>%
select(Beta, SE, "P Value") %>%
as.data.frame()
我得到的结果是:
variable Beta SE P Value
1 mortality_rate_all_ages 0.006562 0.000206 0.000000
2 mortality_rate_all_ages -0.002494 0.000944 0.016081
3 mortality_rate_under1y 0.379471 0.009440 0.000000
4 mortality_rate_under1y -0.381255 0.043261 0.000000
5 mortality_rate_1to10y 0.006717 0.000302 0.000000
6 mortality_rate_1to10y -0.00564 0.001281 0.000446
broom::tidy()
用于特定于预测变量的信息。您将需要 broom::glance()
来获取特定于模型的信息,例如 AIC。
对于每一级变量,如下代码
- 符合模型
- 使用
broom::tidy()
将与预测变量相关的信息提取为长格式
- 将 #2 长格式转换为宽格式,因此所有预测信息都在一行中
- 使用
broom::glance()
提取模型相关信息
- 列将预测变量和模型级别的信息绑定在一起
library(dplyr)
library(broom)
library(tidyr)
data_model3 %>%
group_by(variable) %>%
do({
my_mdl <- lm(value ~ poly(Year, 1), .)
my_tidy <- tidy(my_mdl)
my_tidy_wide <- pivot_wider(my_tidy, names_from = "term", values_from = everything())
my_glance <- glance(my_mdl)
bind_cols(my_tidy_wide, my_glance)
}) %>%
select(variable,
starts_with("estimate"),
starts_with("std.error"),
starts_with("p.value"),
AIC,
adj.r.squared)