在 R 中有效地报告 t.test 值

report t.test value efficiently in R

我有 2 个不同的组,即 am.0am.1,我想创建一个数据框来显示每个组的 mean(sd) 以及 diff(sd) 用它 t.test 符号 ***。非常感谢。

mtcars$am <- factor(mtcars$am)    
        
    my.mtcars <- mtcars %>%
          dplyr::group_by(am)%>%
          dplyr::summarise(
            mean = mean(mpg),
            sd = sd(mpg)); my.mtcars 
    
    my.mtcars$sd <- paste0("(", round(my.mtcars$sd,2), ")") ; my.mtcars 
     

预期答案是一个数据框

        am.0            am.1      diff (SD)
mpg    17.1 (3.83)  24.4 (6.17)   17.1-24.4 (SD)**
disp   ...            ...          ...
gear   ...            ...          ...
    

其中SD为两个独立样本之差的标准误,

SD = sqrt(s_1^2/n1 + s_2^2/n2)

这是我相当粗暴的尝试:

my_signif = function(x, digits) floor(x) + signif(x %% 1, digits)
t.test.df <- function(x,y,df)
{
  t = t.test(eval(parse(text=x))~eval(parse(text=y)), df)
  p = t$p.value
  sig = ifelse(p < 0.001,"***", ifelse(p < 0.01, "**", ifelse(p < 0.05, "*", "")))
  est1 = my_signif(t$estimate[1],2)
  est2 = my_signif(t$estimate[2],2)
  sd1 = my_signif(sd(df[[x]][df[[y]] == levels(df[[y]])[1]]),2)
  sd2 = my_signif(sd(df[[x]][df[[y]] == levels(df[[y]])[2]]),2)
  out = data.frame(a = paste0(est1, " (",sd1,")"), b = paste0(est2, " (",sd2,")"), c = paste0(est1 - est2, " (",sig,")"),row.names = x)
  colnames(out) = c(paste0(y,levels(df[[y]])[1]), paste0(y,levels(df[[y]])[2]), "diff")
  out
}
t.test.df("mpg","am",mtcars)

输出:

> t.test.df("mpg","am",mtcars)
             am0          am1       diff
mpg 17.15 (3.83) 24.39 (6.17) -7.24 (**)

进一步:

t.test.df2 <- function(cols,y,df) do.call(rbind,lapply(cols, function(x) t.test.df(x,y,df)))

输出:

cols = c("mpg", "disp", "gear")
t.test.df2(cols,"am",mtcars)

                 am0           am1         diff
mpg     17.15 (3.83)  24.39 (6.17)   -7.24 (**)
disp 290.38 (110.17) 143.53 (87.2) 146.85 (***)
gear     3.21 (0.42)   4.38 (0.51)  -1.17 (***)