按组获取标准偏差并从 R 中的平均列标准偏差中减去

Get a standard deviation by group and subtract from mean column standard deviation in R

我有一个玩具 df 包含在下面。对于 GradesAge 列,我需要通过 University[ 找出平均标准差与总标准差之间的差异=32=]。新的 table 中的值 - 应该有 4 行和 2 列 - 因此应该代表 GradesAge[= 的标准差差异每个 University 中的 32=] 与 GradesAges 的总 df 标准偏差(在所有大学)。

数据:

library(dplyr)
df <- tibble::tribble(
           ~University, ~Countries, ~Grades, ~Age,
        "University-1",      "USA",      46,    29,
        "University-1",       "UK",      84,    30,
        "University-1",   "Sweden",       5,    28,
        "University-1",    "Spain",      40,    26,
        "University-1", "Portugal",      49,    29,
        "University-1",    "Italy",      16,    24,
        "University-1",      "USA",      34,    19,
        "University-1",       "UK",      66,    28,
        "University-1",   "Sweden",       9,    25,
        "University-1",    "Spain",      80,    20,
        "University-1", "Portugal",      55,    20,
        "University-1",    "Italy",       4,    21,
        "University-1",      "USA",      93,    18,
        "University-1",       "UK",      62,    28,
        "University-1",   "Sweden",      80,    30,
        "University-2",    "Spain",       1,    22,
        "University-2", "Portugal",      56,    25,
        "University-2",    "Italy",       9,    29,
        "University-2",      "USA",      40,    21,
        "University-2",       "UK",      54,    20,
        "University-2",   "Sweden",      60,    24,
        "University-2",    "Spain",      77,    21,
        "University-2", "Portugal",      22,    18,
        "University-2",    "Italy",      53,    29,
        "University-2",      "USA",      11,    21,
        "University-2",       "UK",      65,    27,
        "University-2",   "Sweden",      24,    27,
        "University-2",    "Spain",      18,    23,
        "University-2", "Portugal",      73,    19,
        "University-2",    "Italy",      79,    22,
        "University-1",      "USA",       2,    26,
        "University-1",       "UK",      83,    23,
        "University-1",   "Sweden",       5,    19,
        "University-1",    "Spain",      75,    19,
        "University-1", "Portugal",      12,    21,
        "University-1",    "Italy",      68,    29,
        "University-1",      "USA",     100,    21,
        "University-1",       "UK",      49,    21,
        "University-1",   "Sweden",      81,    20,
        "University-1",    "Spain",      99,    23,
        "University-1", "Portugal",      82,    24,
        "University-1",    "Italy",      23,    26,
        "University-1",      "USA",      86,    30,
        "University-1",       "UK",      50,    20,
        "University-1",   "Sweden",       4,    19,
        "University-2",    "Spain",      12,    25,
        "University-2", "Portugal",      12,    21,
        "University-2",    "Italy",      45,    21,
        "University-2",      "USA",      16,    26,
        "University-2",       "UK",      56,    23,
        "University-2",   "Sweden",      63,    24,
        "University-2",    "Spain",      37,    28,
        "University-2", "Portugal",      86,    21,
        "University-2",    "Italy",      95,    18,
        "University-2",      "USA",      56,    20,
        "University-2",       "UK",      27,    20,
        "University-2",   "Sweden",       3,    27,
        "University-2",    "Spain",      18,    27,
        "University-3", "Portugal",      68,    27,
        "University-3",    "Italy",      48,    21,
        "University-3", "Portugal",      86,    21,
        "University-3",    "Italy",      95,    18,
        "University-3",      "USA",      56,    20,
        "University-3",       "UK",      27,    20,
        "University-3",   "Sweden",       3,    27,
        "University-3",    "Spain",      18,    27,
        "University-3", "Portugal",      68,    27,
        "University-3",    "Italy",      48,    21,
        "University-4", "Portugal",      86,    21,
        "University-4",    "Italy",      95,    18,
        "University-4",      "USA",      56,    20,
        "University-4",       "UK",      27,    20,
        "University-4",   "Sweden",       3,    27,
        "University-4",    "Spain",      18,    27,
        "University-4", "Portugal",      68,    27,
        "University-4",    "Italy",      48,    21
        )

我对此的尝试:

df <- df %>% 
    mutate(grades_sd = sd(Grades),
           age_sd = sd(Age)) %>% 
    group_by(University) %>%
    summarise(Grades_sd = sd(Grades) - grades_sd,
              Age_sd = sd(Age) - age_sd)

这段代码执行了正确的(我认为)计算,但以错误的格式输出了 table。我很感激任何关于这方面的指导。

编辑:包括下面的预期输出。

output <- tibble::tribble(
           ~University, ~Grades_sd, ~Age_sd,
        "University-1", 2.666482, 0.40233934,   
        "University-2", -2.790652, -0.34945170, 
        "University-3", 0.881169, -0.03754330,
        "University-4", 2.398070, 0.06701784)   

由于每个组都有相同的记录,summarise 重复它们。取唯一值将 return 只有一个,并且每个组将有一个摘要行。

df %>% 
    mutate(grades_sd = sd(Grades),
           age_sd = sd(Age)) %>% 
    group_by(University) %>%
    summarise(Grades_sd = sd(Grades) - unique(grades_sd),
              Age_sd = sd(Age) - unique(age_sd))

输出;

  University   Grades_sd  Age_sd
  <chr>            <dbl>   <dbl>
1 University-1     2.67   0.402 
2 University-2    -2.79  -0.349 
3 University-3    -0.881 -0.0375
4 University-4     2.40   0.0670

我们可以使用 df 的未分组版本。

library(dplyr)

df %>% 
    group_by(University) %>%
    summarise(Grades_sd = sd(Grades) - sd(df$Grades),
              Age_sd    = sd(Age) - sd(df$Age))
#> # A tibble: 4 × 3
#>   University   Grades_sd  Age_sd
#>   <chr>            <dbl>   <dbl>
#> 1 University-1     2.67   0.402 
#> 2 University-2    -2.79  -0.349 
#> 3 University-3    -0.881 -0.0375
#> 4 University-4     2.40   0.0670

reprex package (v2.0.1)

创建于 2022-01-06