制作用于匹配多个因变量的函数,purrr

Making a function for matching on multiple dependent variables, purrr

我想使用 Matching 包对多个因变量估计匹配处理效果。

对于单个因变量,我可以 运行 下面 returns 我想要的:

library(carData)
library(purrr)
library(tidyverse)
library(Matching)

matching_df <- Mroz %>% 
  mutate(wc = case_when(wc == "yes" ~ "TRUE", 
                        wc == "no" ~ "FALSE")) %>% 
  drop_na(k5, k618, age, wc, hc, lfp)

matching_df$wc <- as.logical(matching_df$wc)

ps1 <- glm(wc ~ k5 + k618 + age + hc, 
           family = binomial, data = matching_df)

pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)

Y <- matching_df$lfp
Tr <- as.logical(matching_df$wc)
  
psm1 <- Matching::Match(
  Y = Y, 
  Tr = Tr, 
  X = pscore, 
  estimand = "ATT", 
  M = 1, 
  replace = TRUE, 
  caliper = 0.05, 
  version = "fast")

summary(psm1)

Estimate...  0.17479 
SE.........  0.044963 
T-stat.....  3.8873 
p.val......  0.00010135 

Original number of observations..............  753 
Original number of treated obs...............  212 
Matched number of observations...............  207 
Matched number of observations  (unweighted).  1074 

Caliper (SDs)........................................   0.05 
Number of obs dropped by 'exact' or 'caliper'  5 

但是当我尝试使用 purrr:map_dfr 创建一个函数以便我可以对多个因变量重复此操作时,它 returns 出错了。这是我对函数的尝试:

vars <- c("lfp", "lwg", "inc")
names(vars) <- vars

matching_fcn <- function(.x){

  matching_df <- Mroz %>% 
    mutate(wc = case_when(wc == "yes" ~ "TRUE", 
                          wc == "no" ~ "FALSE")) %>% 
    drop_na(k5, k618, age, wc, hc, .x)

  matching_df$wc <- as.logical(matching_df$wc)

  ps1 <- glm(wc ~ k5 + k618 + age + hc, 
             family = binomial, data = matching_df)

  pscore <- ps1$fitted.values
  matching_df <- cbind(matching_df, pscore)

  Y <- matching_df$.x
  Tr <- as.logical(matching_df$wc)
    
  psm1 <- Matching::Match(
    Y = Y, 
    Tr = Tr, 
    X = pscore, 
    estimand = "ATT", 
    M = 1, 
    replace = TRUE, 
    caliper = 0.05, 
    version = "fast")

  summary(psm1)

}

purrr::map_dfr(
  .x = all_of(vars),
  .f = matching_fcn)
Error: All columns in a tibble must be vectors.
x Column `lfp` is a `summary.Match` object.
x Column `lwg` is a `summary.Match` object.
x Column `inc` is a `summary.Match` object.
Run `rlang::last_error()` to see where the error occurred.

最终,我想要一个 tibble,其中在一列中包含因变量的名称,然后是 Matching::Match 返回的估计值、se、T-stat 和 p.val在其他栏目中发挥作用

summary(psm1) 不能放在小标题中。因此,选择 psm1 的一些值并制作您自己的值。此外,drop_na 不是一个好主意,它会使您的结果产生偏差。

library(Matching)
vars <- c("dependent_var_1", "dependent_var_2", "dependent_var_3")
names(vars) <- vars

matching_fcn <- function(.x){
  # matching_df <- matching_df %>% 
  #   drop_na(covar_1, covar_2, covar_3, covar_4, covar_5, covar_6, covar_7, treat_1, .x)
  
  ps1 <- glm(treat_1 ~ covar_1 + covar_2 + covar_3 + covar_4 + covar_5 + covar_6 + covar_7, 
             family = binomial, data = matching_df)
  
  pscore <- ps1$fitted.values
  matching_df <- cbind(matching_df, pscore)
  
  Y <- matching_df[[.x]]
  Tr <- matching_df$treat_1
  
  psm1 <- Matching::Match(
    Y = Y, 
    Tr = Tr, 
    X = pscore, 
    estimand = "ATT", 
    M = 1, 
    replace = TRUE, 
    caliper = 0.05, 
    version = "fast")
  p <- 1 - pnorm(abs(psm1$est.noadj/psm1$se.standard))
  with(psm1, tibble(dv=.x, est=est.noadj, se=se.standard, p=p, ndrops=ndrops))
}

用法和结果

library(dplyr)
library(tidyr)
purrr::map_df(
  .x =  tidyselect::all_of(vars),
  .f = matching_fcn)
# # A tibble: 3 × 5
#   dv                 est    se       p ndrops
#   <chr>            <dbl> <dbl>   <dbl>  <dbl>
# 1 dependent_var_1  0.652 0.231 0.00238      8
# 2 dependent_var_2 -0.216 0.188 0.125        8
# 3 dependent_var_3 -0.506 0.249 0.0210       8

数据

v <- c('covar_1', 'covar_2', 'covar_3', 'covar_4', 'covar_5', 'covar_6', 
       'covar_7', 'treat_1', 'dependent_var_1', 'dependent_var_2', 'dependent_var_3')
set.seed(830595665)
matching_df <- data.frame(matrix(rnorm(100*length(v)), 100, length(v), dimnames=list(c(), v)))
matching_df$treat_1 <- +(matching_df$treat_1 > 0)