purrr -- 将特定模式中的所有变量循环到 运行 相关分析
purrr -- looping all variables within a specific pattern to run correlational analyses
假设我有这样的数据集:
ds <- data.frame(question1 = sample(1:10),
question2 = sample(1:10),
question3 = sample(1:10),
question1_a = sample(1:10),
question2_a = sample(1:10),
question3_a = sample(1:10))
我想检查问题 1 和 question1_a 的类内相关性;问题 2 和 question2_a;问题 3 和 question3_a。 [就是这个模式=数字+number_a].
我要走这条路:
library(psych)
library(tidyverse)
ICC(cbind(ds$question1, ds$question1_a))
ICC(cbind(ds$question2, ds$question2_a))
但我很确定可以在 tidyverse 函数 (purrr / map) 中执行所有 ICC 分析,但我很难实施此解决方案。
感谢您的帮助。
完整代码:
library(psych)
library(tidyverse)
ds <- data.frame(question1 = sample(1:10),
question2 = sample(1:10),
question3 = sample(1:10),
question1_a = sample(1:10),
question2_a = sample(1:10),
question3_a = sample(1:10))
ICC(cbind(ds$question1, ds$question1_a))
ICC(cbind(ds$question2, ds$question2_a))
我们可以这样实现:
set.seed(233)
library(dplyr)
# There might be a better way rather than selecting twice.
# assumes an ordered dataframe
ds %>%
select(matches(".*\d$")) -> df1
ds %>%
select(contains("_")) -> df2
Map(function(x, y) psych::ICC(cbind(x,y)), df1, df2)
或仅使用 base
。我确实注意到,由于重复的子集和对 cbind
的进一步调用,这可能在计算上很昂贵。
Map(function(x, y) psych::ICC(cbind(x,y)), ds[grep(".*\d$",names(ds))],
ds[grep("_",names(ds))])
示例输出:
$question1
Call: psych::ICC(x = cbind(x, y))
Intraclass correlation coefficients
type ICC F df1 df2 p lower bound upper bound
Single_raters_absolute ICC1 0.24 1.6 9 10 0.23 -0.40 0.73
Single_random_raters ICC2 0.24 1.6 9 9 0.24 -0.41 0.73
Single_fixed_raters ICC3 0.24 1.6 9 9 0.24 -0.42 0.73
Average_raters_absolute ICC1k 0.38 1.6 9 10 0.23 -1.33 0.84
Average_random_raters ICC2k 0.38 1.6 9 9 0.24 -1.38 0.85
Average_fixed_raters ICC3k 0.38 1.6 9 9 0.24 -1.48 0.85
Number of subjects = 10 Number of Judges = 2
$question2
Call: psych::ICC(x = cbind(x, y))
数据(种子设置为233):
ds <- structure(list(question1 = c(6L, 8L, 5L, 9L, 3L, 10L, 4L, 2L,
7L, 1L), question2 = c(7L, 2L, 3L, 10L, 8L, 9L, 6L, 5L, 1L, 4L
), question3 = c(3L, 1L, 2L, 9L, 6L, 8L, 7L, 5L, 10L, 4L), question1_a = c(4L,
6L, 2L, 8L, 10L, 9L, 5L, 7L, 1L, 3L), question2_a = c(9L, 1L,
10L, 5L, 4L, 2L, 8L, 7L, 3L, 6L), question3_a = c(9L, 4L, 3L,
6L, 1L, 5L, 10L, 2L, 8L, 7L)), class = "data.frame", row.names = c(NA,
-10L))
这是一个使用 purrr
且无需创建中间数据帧的解决方案
library(psych)
library(tidyverse)
ds <- data.frame(question1 = sample(1:10),
question2 = sample(1:10),
question3 = sample(1:10),
question1_a = sample(1:10),
question2_a = sample(1:10),
question3_a = sample(1:10))
map2(ds %>% select(matches(".*\d$")), ds %>% select(matches(".*_a$")),
function (x, y) ICC(cbind(x, y)))
这两个正则表达式匹配以数字结尾的列和以“_a”结尾的列。它产生
$question1
Call: ICC(x = cbind(x, y))
Intraclass correlation coefficients
type ICC F df1 df2 p lower bound upper bound
Single_raters_absolute ICC1 0.52 3.2 9 10 0.044 -0.090 0.85
Single_random_raters ICC2 0.52 3.2 9 9 0.051 -0.094 0.85
Single_fixed_raters ICC3 0.52 3.2 9 9 0.051 -0.122 0.85
Average_raters_absolute ICC1k 0.68 3.2 9 10 0.044 -0.199 0.92
Average_random_raters ICC2k 0.68 3.2 9 9 0.051 -0.206 0.92
Average_fixed_raters ICC3k 0.68 3.2 9 9 0.051 -0.277 0.92
Number of subjects = 10 Number of Judges = 2
$question2
Call: ICC(x = cbind(x, y))
Intraclass correlation coefficients
type ICC F df1 df2 p lower bound upper bound
Single_raters_absolute ICC1 0.37 2.2 9 10 0.12 -0.27 0.79
Single_random_raters ICC2 0.37 2.2 9 9 0.13 -0.28 0.79
Single_fixed_raters ICC3 0.37 2.2 9 9 0.13 -0.30 0.79
Average_raters_absolute ICC1k 0.54 2.2 9 10 0.12 -0.75 0.88
Average_random_raters ICC2k 0.54 2.2 9 9 0.13 -0.77 0.88
Average_fixed_raters ICC3k 0.54 2.2 9 9 0.13 -0.86 0.89
Number of subjects = 10 Number of Judges = 2
$question3
Call: ICC(x = cbind(x, y))
Intraclass correlation coefficients
type ICC F df1 df2 p lower bound upper bound
Single_raters_absolute ICC1 0.45 2.6 9 10 0.074 -0.18 0.82
Single_random_raters ICC2 0.45 2.6 9 9 0.083 -0.18 0.83
Single_fixed_raters ICC3 0.45 2.6 9 9 0.083 -0.21 0.83
Average_raters_absolute ICC1k 0.62 2.6 9 10 0.074 -0.44 0.90
Average_random_raters ICC2k 0.62 2.6 9 9 0.083 -0.45 0.90
Average_fixed_raters ICC3k 0.62 2.6 9 9 0.083 -0.53 0.91
Number of subjects = 10 Number of Judges = 2
假设我有这样的数据集:
ds <- data.frame(question1 = sample(1:10),
question2 = sample(1:10),
question3 = sample(1:10),
question1_a = sample(1:10),
question2_a = sample(1:10),
question3_a = sample(1:10))
我想检查问题 1 和 question1_a 的类内相关性;问题 2 和 question2_a;问题 3 和 question3_a。 [就是这个模式=数字+number_a].
我要走这条路:
library(psych)
library(tidyverse)
ICC(cbind(ds$question1, ds$question1_a))
ICC(cbind(ds$question2, ds$question2_a))
但我很确定可以在 tidyverse 函数 (purrr / map) 中执行所有 ICC 分析,但我很难实施此解决方案。
感谢您的帮助。
完整代码:
library(psych)
library(tidyverse)
ds <- data.frame(question1 = sample(1:10),
question2 = sample(1:10),
question3 = sample(1:10),
question1_a = sample(1:10),
question2_a = sample(1:10),
question3_a = sample(1:10))
ICC(cbind(ds$question1, ds$question1_a))
ICC(cbind(ds$question2, ds$question2_a))
我们可以这样实现:
set.seed(233)
library(dplyr)
# There might be a better way rather than selecting twice.
# assumes an ordered dataframe
ds %>%
select(matches(".*\d$")) -> df1
ds %>%
select(contains("_")) -> df2
Map(function(x, y) psych::ICC(cbind(x,y)), df1, df2)
或仅使用 base
。我确实注意到,由于重复的子集和对 cbind
的进一步调用,这可能在计算上很昂贵。
Map(function(x, y) psych::ICC(cbind(x,y)), ds[grep(".*\d$",names(ds))],
ds[grep("_",names(ds))])
示例输出:
$question1
Call: psych::ICC(x = cbind(x, y))
Intraclass correlation coefficients
type ICC F df1 df2 p lower bound upper bound
Single_raters_absolute ICC1 0.24 1.6 9 10 0.23 -0.40 0.73
Single_random_raters ICC2 0.24 1.6 9 9 0.24 -0.41 0.73
Single_fixed_raters ICC3 0.24 1.6 9 9 0.24 -0.42 0.73
Average_raters_absolute ICC1k 0.38 1.6 9 10 0.23 -1.33 0.84
Average_random_raters ICC2k 0.38 1.6 9 9 0.24 -1.38 0.85
Average_fixed_raters ICC3k 0.38 1.6 9 9 0.24 -1.48 0.85
Number of subjects = 10 Number of Judges = 2
$question2
Call: psych::ICC(x = cbind(x, y))
数据(种子设置为233):
ds <- structure(list(question1 = c(6L, 8L, 5L, 9L, 3L, 10L, 4L, 2L,
7L, 1L), question2 = c(7L, 2L, 3L, 10L, 8L, 9L, 6L, 5L, 1L, 4L
), question3 = c(3L, 1L, 2L, 9L, 6L, 8L, 7L, 5L, 10L, 4L), question1_a = c(4L,
6L, 2L, 8L, 10L, 9L, 5L, 7L, 1L, 3L), question2_a = c(9L, 1L,
10L, 5L, 4L, 2L, 8L, 7L, 3L, 6L), question3_a = c(9L, 4L, 3L,
6L, 1L, 5L, 10L, 2L, 8L, 7L)), class = "data.frame", row.names = c(NA,
-10L))
这是一个使用 purrr
且无需创建中间数据帧的解决方案
library(psych)
library(tidyverse)
ds <- data.frame(question1 = sample(1:10),
question2 = sample(1:10),
question3 = sample(1:10),
question1_a = sample(1:10),
question2_a = sample(1:10),
question3_a = sample(1:10))
map2(ds %>% select(matches(".*\d$")), ds %>% select(matches(".*_a$")),
function (x, y) ICC(cbind(x, y)))
这两个正则表达式匹配以数字结尾的列和以“_a”结尾的列。它产生
$question1
Call: ICC(x = cbind(x, y))
Intraclass correlation coefficients
type ICC F df1 df2 p lower bound upper bound
Single_raters_absolute ICC1 0.52 3.2 9 10 0.044 -0.090 0.85
Single_random_raters ICC2 0.52 3.2 9 9 0.051 -0.094 0.85
Single_fixed_raters ICC3 0.52 3.2 9 9 0.051 -0.122 0.85
Average_raters_absolute ICC1k 0.68 3.2 9 10 0.044 -0.199 0.92
Average_random_raters ICC2k 0.68 3.2 9 9 0.051 -0.206 0.92
Average_fixed_raters ICC3k 0.68 3.2 9 9 0.051 -0.277 0.92
Number of subjects = 10 Number of Judges = 2
$question2
Call: ICC(x = cbind(x, y))
Intraclass correlation coefficients
type ICC F df1 df2 p lower bound upper bound
Single_raters_absolute ICC1 0.37 2.2 9 10 0.12 -0.27 0.79
Single_random_raters ICC2 0.37 2.2 9 9 0.13 -0.28 0.79
Single_fixed_raters ICC3 0.37 2.2 9 9 0.13 -0.30 0.79
Average_raters_absolute ICC1k 0.54 2.2 9 10 0.12 -0.75 0.88
Average_random_raters ICC2k 0.54 2.2 9 9 0.13 -0.77 0.88
Average_fixed_raters ICC3k 0.54 2.2 9 9 0.13 -0.86 0.89
Number of subjects = 10 Number of Judges = 2
$question3
Call: ICC(x = cbind(x, y))
Intraclass correlation coefficients
type ICC F df1 df2 p lower bound upper bound
Single_raters_absolute ICC1 0.45 2.6 9 10 0.074 -0.18 0.82
Single_random_raters ICC2 0.45 2.6 9 9 0.083 -0.18 0.83
Single_fixed_raters ICC3 0.45 2.6 9 9 0.083 -0.21 0.83
Average_raters_absolute ICC1k 0.62 2.6 9 10 0.074 -0.44 0.90
Average_random_raters ICC2k 0.62 2.6 9 9 0.083 -0.45 0.90
Average_fixed_raters ICC3k 0.62 2.6 9 9 0.083 -0.53 0.91
Number of subjects = 10 Number of Judges = 2