r中具有不同变量条件的循环线性模型
loop linear model with different variables condition in r
我想为线性模型做一个循环但是遇到了问题。
首先,我写了一个循环(不能运行)来提取我想要的 beta 值。
y <- c('y1', 'y2')
x1 <- c('a1', 'a2', 'a3')
x2 <- c('A', 'B')
for (y in y) {
for (x1 in x1) {
for (x2 in x2) {
m <- lm(as.name(y) ~ as.name(x1) + as.name(x2), data = dat) %>% summary() %>% .$coefficients %>% .[2,1]
}
}
}
for循环中y
、x1
、x2
的组合不是我所期望的。 lm
模型中的公式如下:expand.grid(y, x1, x2)
。而我所期望的是自变量位置上相同字母的所有组合。这是我的原始代码:
m1 <- lm(y1 ~ a1 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m2 <- lm(y1 ~ a2 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m3 <- lm(y1 ~ a3 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m4 <- lm(y2 ~ a1 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m5 <- lm(y2 ~ a2 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m6 <- lm(y2 ~ a3 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m7 <- lm(y1 ~ b1 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m8 <- lm(y1 ~ b2 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m9 <- lm(y1 ~ b3 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m10 <- lm(y2 ~ b1 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m11 <- lm(y2 ~ b2 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m12 <- lm(y2 ~ b3 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]```
这是我的数据。
非常感谢任何帮助!
dat <- structure(list(y1 = c(0.838141931, 0.174850172, 0.116144283,
0.113778511, 0.494270733, 0.874482265, 0.325621743, 0.661045636,
0.452396096), y2 = c(0.487877797, 0.360726955, 0.380614137, 0.169760207,
0.359371965, 0.743837108, 0.156535906, 0.995989192, 0.331058618
), a1 = c(0.336537159, 0.446060609, 0.57586382, 0.09629329, 0.491634112,
0.988226873, 0.929105257, 0.605957031, 0.470720774), a2 = c(0.128615421,
0.831986313, 0.267777151, 0.313178319, 0.7776461, 0.863337292,
0.042818986, 0.830029959, 0.901586271), a3 = c(0.291053766, 0.546719865,
0.918797744, 0.976353885, 0.193777436, 0.953859399, 0.963312236,
0.191449484, 0.825034161), b1 = c(0.31510338, 0.5441007, 0.515466925,
0.030702511, 0.020932599, 0.334734486, 0.586588252, 0.562970761,
0.848337089), b2 = c(0.426787995, 0.350719803, 0.706471337, 0.346462166,
0.099766511, 0.219781154, 0.565047862, 0.50282167, 0.727813725
), b3 = c(0.799666435, 0.07225825, 0.409411895, 0.701122141,
0.529991257, 0.478439097, 0.79467065, 0.442156618, 0.026693511
), A = c(0.43143391, 0.662313075, 0.584967093, 0.866110621, 0.598682492,
0.14665666, 0.454315631, 0.448968611, 0.238969939), B = c(0.060625179,
0.410312393, 0.614411256, 0.127343899, 0.90370096, 0.882024428,
0.681389602, 0.56535592, 0.850829599)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), spec = structure(list(
cols = list(y1 = structure(list(), class = c("collector_double",
"collector")), y2 = structure(list(), class = c("collector_double",
"collector")), a1 = structure(list(), class = c("collector_double",
"collector")), a2 = structure(list(), class = c("collector_double",
"collector")), a3 = structure(list(), class = c("collector_double",
"collector")), b1 = structure(list(), class = c("collector_double",
"collector")), b2 = structure(list(), class = c("collector_double",
"collector")), b3 = structure(list(), class = c("collector_double",
"collector")), A = structure(list(), class = c("collector_double",
"collector")), B = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
我不确定我是否理解正确,但您似乎以错误的方式组织了数据。也许这就是您想要的:
new_dat = data.frame(
y = c(rep(dat$y1, 3 + 3), rep(dat$y2, 3 + 3)),
x = c(rep(c(dat$a1, dat$a2, dat$a3), 2),
rep(c(dat$b1, dat$b2, dat$b3), 2)),
z = c(rep(dat$A, 3 + 3), rep(dat$B, 3 + 3))
)
models = with(new_dat, mapply(function(y, x, z) model = lm(y ~ x + z),
y, x, z, SIMPLIFY = FALSE))
如果这正是您要找的,请告诉我们。如果没有,请详细说明...
使用 expand.grid
您可以创建 y
、x1
和 x2
的所有组合。使用 sprintf
.
将公式创建为字符串
df <- expand.grid(y, x1, x2)
formula_strings <- sprintf('%s ~ %s + %s', df$Var1, df$Var2, df$Var3)
formula_strings
# [1] "y1 ~ a1 + A" "y2 ~ a1 + A" "y1 ~ a2 + A" "y2 ~ a2 + A"
# [5] "y1 ~ a3 + A" "y2 ~ a3 + A" "y1 ~ a1 + B" "y2 ~ a1 + B"
# [9] "y1 ~ a2 + B" "y2 ~ a2 + B" "y1 ~ a3 + B" "y2 ~ a3 + B"
使用 sapply
应用模型并从每个模型中提取系数。
values <- sapply(formula_strings, function(x)
lm(x, dat) %>% summary() %>% .$coefficients %>% .[2,1])
values
#y1 ~ a1 + A y2 ~ a1 + A y1 ~ a2 + A y2 ~ a2 + A y1 ~ a3 + A y2 ~ a3 + A
# -0.254943 -0.005956 -0.006177 0.286670 -0.393284 -0.387501
#y1 ~ a1 + B y2 ~ a1 + B y1 ~ a2 + B y2 ~ a2 + B y1 ~ a3 + B y2 ~ a3 + B
# 0.506848 0.371615 0.182370 0.435684 -0.370723 -0.380668
如果您需要:y ~ b[1-3] + B 和 y ~ a[1-3] + A 而不需要 y ~ a[1-3] + B 和 y ~ b[ 1-3] + A,那么你可以这样设置data.frame:
library(dplyr)
res = rbind(
expand.grid(response=c('y1', 'y2'),pre1=c('a1', 'a2', 'a3'),pre2='A',stringsAsFactors = FALSE),
expand.grid(response=c('y1', 'y2'),pre1=c('b1', 'b2', 'b3'),pre2='B',stringsAsFactors = FALSE)
)
从你的部分代码来看,你似乎只需要第二个系数,所以一个简单的基本 R 方法将使用 reformulate
为每一行构造公式:
res$coef = sapply(1:nrow(res),function(i){
predictor = as.character(res[i,c("pre1","pre2")])
response = res$response[i]
f = reformulate(predictor,response=response)
coefficients(lm(f,data=dat))[2]
})
head(res)
response pre1 pre2 coef
1 y1 a1 A -0.254942513
2 y2 a1 A -0.005955600
3 y1 a2 A -0.006177156
4 y2 a2 A 0.286669786
5 y1 a3 A -0.393284033
6 y2 a3 A -0.387500900
一个整洁的解决方案是这样的,我们首先嵌套模型
library(purrr)
library(broom)
library(tidyr)
res = rbind(
expand.grid(response=c('y1', 'y2'),pre1=c('a1', 'a2', 'a3'),pre2='A',stringsAsFactors = FALSE),
expand.grid(response=c('y1', 'y2'),pre1=c('b1', 'b2', 'b3'),pre2='B',stringsAsFactors = FALSE)
)
res = res %>%
mutate(model=1:n()) %>%
nest(param=c(c(response, pre1, pre2))) %>%
mutate(
fit = map(param,~
lm(reformulate(c(.$pre1,.$pre2),response=.$response),data=dat)),
tidied=map(fit,tidy)
)
模型和结果嵌套在小标题中:
res
# A tibble: 12 x 4
model param fit tidied
<int> <list> <list> <list>
1 1 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
2 2 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
3 3 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
4 4 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
如果你需要第二个系数:
res %>% unnest(c(param,tidied)) %>% filter(term==pre1)
# A tibble: 12 x 10
model response pre1 pre2 fit term estimate std.error statistic p.value
<int> <chr> <chr> <chr> <list> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 y1 a1 A <lm> a1 -0.255 0.390 -0.653 0.538
2 2 y2 a1 A <lm> a1 -0.00596 0.479 -0.0124 0.990
3 3 y1 a2 A <lm> a2 -0.00618 0.246 -0.0251 0.981
4 4 y2 a2 A <lm> a2 0.287 0.268 1.07 0.325
5 5 y1 a3 A <lm> a3 -0.393 0.176 -2.24 0.0664
6 6 y2 a3 A <lm> a3 -0.388 0.233 -1.66 0.148
7 7 y1 a1 B <lm> a1 0.507 0.541 0.937 0.385
8 8 y2 a1 B <lm> a1 0.372 0.510 0.729 0.493
9 9 y1 a2 B <lm> a2 0.182 0.391 0.466 0.657
10 10 y2 a2 B <lm> a2 0.436 0.319 1.37 0.221
11 11 y1 a3 B <lm> a3 -0.371 0.310 -1.19 0.277
12 12 y2 a3 B <lm> a3 -0.381 0.276 -1.38 0.217
我想为线性模型做一个循环但是遇到了问题。
首先,我写了一个循环(不能运行)来提取我想要的 beta 值。
y <- c('y1', 'y2')
x1 <- c('a1', 'a2', 'a3')
x2 <- c('A', 'B')
for (y in y) {
for (x1 in x1) {
for (x2 in x2) {
m <- lm(as.name(y) ~ as.name(x1) + as.name(x2), data = dat) %>% summary() %>% .$coefficients %>% .[2,1]
}
}
}
for循环中y
、x1
、x2
的组合不是我所期望的。 lm
模型中的公式如下:expand.grid(y, x1, x2)
。而我所期望的是自变量位置上相同字母的所有组合。这是我的原始代码:
m1 <- lm(y1 ~ a1 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m2 <- lm(y1 ~ a2 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m3 <- lm(y1 ~ a3 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m4 <- lm(y2 ~ a1 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m5 <- lm(y2 ~ a2 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m6 <- lm(y2 ~ a3 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m7 <- lm(y1 ~ b1 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m8 <- lm(y1 ~ b2 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m9 <- lm(y1 ~ b3 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m10 <- lm(y2 ~ b1 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m11 <- lm(y2 ~ b2 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m12 <- lm(y2 ~ b3 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]```
这是我的数据。
非常感谢任何帮助!
dat <- structure(list(y1 = c(0.838141931, 0.174850172, 0.116144283,
0.113778511, 0.494270733, 0.874482265, 0.325621743, 0.661045636,
0.452396096), y2 = c(0.487877797, 0.360726955, 0.380614137, 0.169760207,
0.359371965, 0.743837108, 0.156535906, 0.995989192, 0.331058618
), a1 = c(0.336537159, 0.446060609, 0.57586382, 0.09629329, 0.491634112,
0.988226873, 0.929105257, 0.605957031, 0.470720774), a2 = c(0.128615421,
0.831986313, 0.267777151, 0.313178319, 0.7776461, 0.863337292,
0.042818986, 0.830029959, 0.901586271), a3 = c(0.291053766, 0.546719865,
0.918797744, 0.976353885, 0.193777436, 0.953859399, 0.963312236,
0.191449484, 0.825034161), b1 = c(0.31510338, 0.5441007, 0.515466925,
0.030702511, 0.020932599, 0.334734486, 0.586588252, 0.562970761,
0.848337089), b2 = c(0.426787995, 0.350719803, 0.706471337, 0.346462166,
0.099766511, 0.219781154, 0.565047862, 0.50282167, 0.727813725
), b3 = c(0.799666435, 0.07225825, 0.409411895, 0.701122141,
0.529991257, 0.478439097, 0.79467065, 0.442156618, 0.026693511
), A = c(0.43143391, 0.662313075, 0.584967093, 0.866110621, 0.598682492,
0.14665666, 0.454315631, 0.448968611, 0.238969939), B = c(0.060625179,
0.410312393, 0.614411256, 0.127343899, 0.90370096, 0.882024428,
0.681389602, 0.56535592, 0.850829599)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), spec = structure(list(
cols = list(y1 = structure(list(), class = c("collector_double",
"collector")), y2 = structure(list(), class = c("collector_double",
"collector")), a1 = structure(list(), class = c("collector_double",
"collector")), a2 = structure(list(), class = c("collector_double",
"collector")), a3 = structure(list(), class = c("collector_double",
"collector")), b1 = structure(list(), class = c("collector_double",
"collector")), b2 = structure(list(), class = c("collector_double",
"collector")), b3 = structure(list(), class = c("collector_double",
"collector")), A = structure(list(), class = c("collector_double",
"collector")), B = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
我不确定我是否理解正确,但您似乎以错误的方式组织了数据。也许这就是您想要的:
new_dat = data.frame(
y = c(rep(dat$y1, 3 + 3), rep(dat$y2, 3 + 3)),
x = c(rep(c(dat$a1, dat$a2, dat$a3), 2),
rep(c(dat$b1, dat$b2, dat$b3), 2)),
z = c(rep(dat$A, 3 + 3), rep(dat$B, 3 + 3))
)
models = with(new_dat, mapply(function(y, x, z) model = lm(y ~ x + z),
y, x, z, SIMPLIFY = FALSE))
如果这正是您要找的,请告诉我们。如果没有,请详细说明...
使用 expand.grid
您可以创建 y
、x1
和 x2
的所有组合。使用 sprintf
.
df <- expand.grid(y, x1, x2)
formula_strings <- sprintf('%s ~ %s + %s', df$Var1, df$Var2, df$Var3)
formula_strings
# [1] "y1 ~ a1 + A" "y2 ~ a1 + A" "y1 ~ a2 + A" "y2 ~ a2 + A"
# [5] "y1 ~ a3 + A" "y2 ~ a3 + A" "y1 ~ a1 + B" "y2 ~ a1 + B"
# [9] "y1 ~ a2 + B" "y2 ~ a2 + B" "y1 ~ a3 + B" "y2 ~ a3 + B"
使用 sapply
应用模型并从每个模型中提取系数。
values <- sapply(formula_strings, function(x)
lm(x, dat) %>% summary() %>% .$coefficients %>% .[2,1])
values
#y1 ~ a1 + A y2 ~ a1 + A y1 ~ a2 + A y2 ~ a2 + A y1 ~ a3 + A y2 ~ a3 + A
# -0.254943 -0.005956 -0.006177 0.286670 -0.393284 -0.387501
#y1 ~ a1 + B y2 ~ a1 + B y1 ~ a2 + B y2 ~ a2 + B y1 ~ a3 + B y2 ~ a3 + B
# 0.506848 0.371615 0.182370 0.435684 -0.370723 -0.380668
如果您需要:y ~ b[1-3] + B 和 y ~ a[1-3] + A 而不需要 y ~ a[1-3] + B 和 y ~ b[ 1-3] + A,那么你可以这样设置data.frame:
library(dplyr)
res = rbind(
expand.grid(response=c('y1', 'y2'),pre1=c('a1', 'a2', 'a3'),pre2='A',stringsAsFactors = FALSE),
expand.grid(response=c('y1', 'y2'),pre1=c('b1', 'b2', 'b3'),pre2='B',stringsAsFactors = FALSE)
)
从你的部分代码来看,你似乎只需要第二个系数,所以一个简单的基本 R 方法将使用 reformulate
为每一行构造公式:
res$coef = sapply(1:nrow(res),function(i){
predictor = as.character(res[i,c("pre1","pre2")])
response = res$response[i]
f = reformulate(predictor,response=response)
coefficients(lm(f,data=dat))[2]
})
head(res)
response pre1 pre2 coef
1 y1 a1 A -0.254942513
2 y2 a1 A -0.005955600
3 y1 a2 A -0.006177156
4 y2 a2 A 0.286669786
5 y1 a3 A -0.393284033
6 y2 a3 A -0.387500900
一个整洁的解决方案是这样的,我们首先嵌套模型
library(purrr)
library(broom)
library(tidyr)
res = rbind(
expand.grid(response=c('y1', 'y2'),pre1=c('a1', 'a2', 'a3'),pre2='A',stringsAsFactors = FALSE),
expand.grid(response=c('y1', 'y2'),pre1=c('b1', 'b2', 'b3'),pre2='B',stringsAsFactors = FALSE)
)
res = res %>%
mutate(model=1:n()) %>%
nest(param=c(c(response, pre1, pre2))) %>%
mutate(
fit = map(param,~
lm(reformulate(c(.$pre1,.$pre2),response=.$response),data=dat)),
tidied=map(fit,tidy)
)
模型和结果嵌套在小标题中:
res
# A tibble: 12 x 4
model param fit tidied
<int> <list> <list> <list>
1 1 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
2 2 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
3 3 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
4 4 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
如果你需要第二个系数:
res %>% unnest(c(param,tidied)) %>% filter(term==pre1)
# A tibble: 12 x 10
model response pre1 pre2 fit term estimate std.error statistic p.value
<int> <chr> <chr> <chr> <list> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 y1 a1 A <lm> a1 -0.255 0.390 -0.653 0.538
2 2 y2 a1 A <lm> a1 -0.00596 0.479 -0.0124 0.990
3 3 y1 a2 A <lm> a2 -0.00618 0.246 -0.0251 0.981
4 4 y2 a2 A <lm> a2 0.287 0.268 1.07 0.325
5 5 y1 a3 A <lm> a3 -0.393 0.176 -2.24 0.0664
6 6 y2 a3 A <lm> a3 -0.388 0.233 -1.66 0.148
7 7 y1 a1 B <lm> a1 0.507 0.541 0.937 0.385
8 8 y2 a1 B <lm> a1 0.372 0.510 0.729 0.493
9 9 y1 a2 B <lm> a2 0.182 0.391 0.466 0.657
10 10 y2 a2 B <lm> a2 0.436 0.319 1.37 0.221
11 11 y1 a3 B <lm> a3 -0.371 0.310 -1.19 0.277
12 12 y2 a3 B <lm> a3 -0.381 0.276 -1.38 0.217