制作用于匹配多个因变量的函数,purrr
Making a function for matching on multiple dependent variables, purrr
我想使用 Matching
包对多个因变量估计匹配处理效果。
对于单个因变量,我可以 运行 下面 returns 我想要的:
library(carData)
library(purrr)
library(tidyverse)
library(Matching)
matching_df <- Mroz %>%
mutate(wc = case_when(wc == "yes" ~ "TRUE",
wc == "no" ~ "FALSE")) %>%
drop_na(k5, k618, age, wc, hc, lfp)
matching_df$wc <- as.logical(matching_df$wc)
ps1 <- glm(wc ~ k5 + k618 + age + hc,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df$lfp
Tr <- as.logical(matching_df$wc)
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
summary(psm1)
Estimate... 0.17479
SE......... 0.044963
T-stat..... 3.8873
p.val...... 0.00010135
Original number of observations.............. 753
Original number of treated obs............... 212
Matched number of observations............... 207
Matched number of observations (unweighted). 1074
Caliper (SDs)........................................ 0.05
Number of obs dropped by 'exact' or 'caliper' 5
但是当我尝试使用 purrr:map_dfr
创建一个函数以便我可以对多个因变量重复此操作时,它 returns 出错了。这是我对函数的尝试:
vars <- c("lfp", "lwg", "inc")
names(vars) <- vars
matching_fcn <- function(.x){
matching_df <- Mroz %>%
mutate(wc = case_when(wc == "yes" ~ "TRUE",
wc == "no" ~ "FALSE")) %>%
drop_na(k5, k618, age, wc, hc, .x)
matching_df$wc <- as.logical(matching_df$wc)
ps1 <- glm(wc ~ k5 + k618 + age + hc,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df$.x
Tr <- as.logical(matching_df$wc)
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
summary(psm1)
}
purrr::map_dfr(
.x = all_of(vars),
.f = matching_fcn)
Error: All columns in a tibble must be vectors.
x Column `lfp` is a `summary.Match` object.
x Column `lwg` is a `summary.Match` object.
x Column `inc` is a `summary.Match` object.
Run `rlang::last_error()` to see where the error occurred.
最终,我想要一个 tibble,其中在一列中包含因变量的名称,然后是 Matching::Match 返回的估计值、se、T-stat 和 p.val在其他栏目中发挥作用
summary(psm1)
不能放在小标题中。因此,选择 psm1
的一些值并制作您自己的值。此外,drop_na
不是一个好主意,它会使您的结果产生偏差。
library(Matching)
vars <- c("dependent_var_1", "dependent_var_2", "dependent_var_3")
names(vars) <- vars
matching_fcn <- function(.x){
# matching_df <- matching_df %>%
# drop_na(covar_1, covar_2, covar_3, covar_4, covar_5, covar_6, covar_7, treat_1, .x)
ps1 <- glm(treat_1 ~ covar_1 + covar_2 + covar_3 + covar_4 + covar_5 + covar_6 + covar_7,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df[[.x]]
Tr <- matching_df$treat_1
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
p <- 1 - pnorm(abs(psm1$est.noadj/psm1$se.standard))
with(psm1, tibble(dv=.x, est=est.noadj, se=se.standard, p=p, ndrops=ndrops))
}
用法和结果
library(dplyr)
library(tidyr)
purrr::map_df(
.x = tidyselect::all_of(vars),
.f = matching_fcn)
# # A tibble: 3 × 5
# dv est se p ndrops
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 dependent_var_1 0.652 0.231 0.00238 8
# 2 dependent_var_2 -0.216 0.188 0.125 8
# 3 dependent_var_3 -0.506 0.249 0.0210 8
数据
v <- c('covar_1', 'covar_2', 'covar_3', 'covar_4', 'covar_5', 'covar_6',
'covar_7', 'treat_1', 'dependent_var_1', 'dependent_var_2', 'dependent_var_3')
set.seed(830595665)
matching_df <- data.frame(matrix(rnorm(100*length(v)), 100, length(v), dimnames=list(c(), v)))
matching_df$treat_1 <- +(matching_df$treat_1 > 0)
我想使用 Matching
包对多个因变量估计匹配处理效果。
对于单个因变量,我可以 运行 下面 returns 我想要的:
library(carData)
library(purrr)
library(tidyverse)
library(Matching)
matching_df <- Mroz %>%
mutate(wc = case_when(wc == "yes" ~ "TRUE",
wc == "no" ~ "FALSE")) %>%
drop_na(k5, k618, age, wc, hc, lfp)
matching_df$wc <- as.logical(matching_df$wc)
ps1 <- glm(wc ~ k5 + k618 + age + hc,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df$lfp
Tr <- as.logical(matching_df$wc)
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
summary(psm1)
Estimate... 0.17479
SE......... 0.044963
T-stat..... 3.8873
p.val...... 0.00010135
Original number of observations.............. 753
Original number of treated obs............... 212
Matched number of observations............... 207
Matched number of observations (unweighted). 1074
Caliper (SDs)........................................ 0.05
Number of obs dropped by 'exact' or 'caliper' 5
但是当我尝试使用 purrr:map_dfr
创建一个函数以便我可以对多个因变量重复此操作时,它 returns 出错了。这是我对函数的尝试:
vars <- c("lfp", "lwg", "inc")
names(vars) <- vars
matching_fcn <- function(.x){
matching_df <- Mroz %>%
mutate(wc = case_when(wc == "yes" ~ "TRUE",
wc == "no" ~ "FALSE")) %>%
drop_na(k5, k618, age, wc, hc, .x)
matching_df$wc <- as.logical(matching_df$wc)
ps1 <- glm(wc ~ k5 + k618 + age + hc,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df$.x
Tr <- as.logical(matching_df$wc)
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
summary(psm1)
}
purrr::map_dfr(
.x = all_of(vars),
.f = matching_fcn)
Error: All columns in a tibble must be vectors.
x Column `lfp` is a `summary.Match` object.
x Column `lwg` is a `summary.Match` object.
x Column `inc` is a `summary.Match` object.
Run `rlang::last_error()` to see where the error occurred.
最终,我想要一个 tibble,其中在一列中包含因变量的名称,然后是 Matching::Match 返回的估计值、se、T-stat 和 p.val在其他栏目中发挥作用
summary(psm1)
不能放在小标题中。因此,选择 psm1
的一些值并制作您自己的值。此外,drop_na
不是一个好主意,它会使您的结果产生偏差。
library(Matching)
vars <- c("dependent_var_1", "dependent_var_2", "dependent_var_3")
names(vars) <- vars
matching_fcn <- function(.x){
# matching_df <- matching_df %>%
# drop_na(covar_1, covar_2, covar_3, covar_4, covar_5, covar_6, covar_7, treat_1, .x)
ps1 <- glm(treat_1 ~ covar_1 + covar_2 + covar_3 + covar_4 + covar_5 + covar_6 + covar_7,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df[[.x]]
Tr <- matching_df$treat_1
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
p <- 1 - pnorm(abs(psm1$est.noadj/psm1$se.standard))
with(psm1, tibble(dv=.x, est=est.noadj, se=se.standard, p=p, ndrops=ndrops))
}
用法和结果
library(dplyr)
library(tidyr)
purrr::map_df(
.x = tidyselect::all_of(vars),
.f = matching_fcn)
# # A tibble: 3 × 5
# dv est se p ndrops
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 dependent_var_1 0.652 0.231 0.00238 8
# 2 dependent_var_2 -0.216 0.188 0.125 8
# 3 dependent_var_3 -0.506 0.249 0.0210 8
数据
v <- c('covar_1', 'covar_2', 'covar_3', 'covar_4', 'covar_5', 'covar_6',
'covar_7', 'treat_1', 'dependent_var_1', 'dependent_var_2', 'dependent_var_3')
set.seed(830595665)
matching_df <- data.frame(matrix(rnorm(100*length(v)), 100, length(v), dimnames=list(c(), v)))
matching_df$treat_1 <- +(matching_df$treat_1 > 0)