在数据集的选定元素上映射函数
Mapping a function on selected elements of a data set
我一直在尝试将函数 bctau(以及其中包含的 theil)映射到数据集的选定元素上。此函数使用 two-step 过程来计算 AB single-case 设计的效果大小。它需要两个参数(a = 基线阶段的所有值;b = 干预阶段的所有值)。下面,您可以找到 Tarlow (2017) 开发的两个功能。
library(Kendall)
library(dplyr)
library(purrr)
library(tidyr)
bctau <- function(a,b) {
# The bctau() function accepts two arguments, a and b, which
# are vectors for each phase in an AB single-case design
n <- length(a) + length(b)
ta <- 1:(length(a))
tb <- (length(a) + 1):(length(a) + length(b))
# if baseline trend is not statistically significant,
# return tau result (no trend correction)
if (Kendall(a,ta)$sl > .05) {
results <- Kendall(c(a,b), c(rep(0,length(a)), rep(1,length(b))))
tau <- as.numeric(results$tau)
p <- as.numeric(results$sl)
se <- sqrt((2/n) * (1 - (tau^2)))
return(list(tau = tau, p = p, se = se, corrected = FALSE))
}
# if baseline trend is statistically significant,
# get Theil-Sen residuals
theilsen <- theil(ta, a)
slope <- theilsen$slope
intercept <- theilsen$int
correcteda <- as.numeric()
correctedb <- as.numeric()
for (i in 1:length(a)) {
correcteda[i] <- a[i] - (slope*i + intercept)
}
for (i in 1:length(b)) {
correctedb[i] <- b[i] - (slope*(i + length(a)) + intercept)
}
results <- Kendall(c(correcteda,correctedb),c(rep(0,length(a)),rep(1,length(b))))
tau <- as.numeric(results$tau)
p <- as.numeric(results$sl)
se <- sqrt((2/n) * (1 - (tau^2)))
return(list(tau = tau, p = p, se = se, corrected = TRUE, int = intercept, slope = slope, correcteda = correcteda, correctedb = correctedb))
}
theil <- function(x,y) {
# returns theil-sen slope and intercept estimates;
# x and y are two equal length vectors (x & y coords)
n <- length(x)
slopes <- as.numeric()
ints <- as.numeric()
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
slopes <- c(slopes, ((y[j] - y[i]) / (x[j] - x[i])))
}
}
b <- median(slopes)
for (i in 1:n) {
ints <- c(ints, (y[i] - (b*x[i])))
}
results <- list(slope = b, int = median(ints))
return(results)
}
我的数据集由五列组成:
1. Scalex:为参与者评分的行为量表;
2. IDx:参与者ID(注意每个参与者完成了两个量表);
3. Timex:session的个数(Phase每次变化时re-starts);
4.阶段:基线(A)或干预阶段(B);
5. Ratex:评级量表分数(从 1 到 20)。
Scalex <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
IDx <- c("C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2")
Timex <- c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 5)
Phasex <- c("A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "B")
Ratex <- c(4, 8, 10, 12, 15, 7, 7, 9, 14, 15, 16, 4, 3, 2, 2, 7, 7, 9, 14, 2, 3, 6, 6, 7, 5, 9, 11, 5, 6, 3, 4, 8, 7, 9, 3, 3)
db <- data.frame(Scalex, IDx, Timex, Phasex, Ratex)
我想做的是按比例对我的数据进行分组,然后将函数 bctau() 应用于每个参与者。我尝试将参与者嵌套到量表中,这就是结果。
d <- db %>%
group_by(Scalex) %>%
nest()
# A tibble: 2 x 2
# Scalex data
# <dbl> <list>
#1 1 <tibble [19 x 4]>
#2 2 <tibble [17 x 4]>
这是其中一个嵌套小标题的样子。 Phasex 表示评级是在基线阶段 (A) 还是干预阶段 (B) 进行的,而 Ratex 表示学生的行为评级分数。
d$data[[1]]
#[[1]]
# # A tibble: 19 x 4
# IDx Timex Phasex Ratex
# <fct> <dbl> <fct> <dbl>
# 1 C1 1 A 4
# 2 C1 2 A 8
# 3 C1 3 A 10
# 4 C1 4 A 12
# 5 C1 5 A 15
# 6 C1 1 B 7
# 7 C1 2 B 7
# 8 C1 3 B 9
# 9 C1 4 B 14
#10 C1 5 B 15
#11 C1 6 B 16
#12 C2 1 A 4
#13 C2 2 A 3
#14 C2 3 A 2
#15 C2 4 A 2
#16 C2 1 B 7
#17 C2 2 B 7
#18 C2 3 B 9
#19 C2 4 B 14
我试着写了这段代码。我使用了 map2_df 因为我使用了两个变量。我的代码每次采用两列,并将第一列用作基线,将第二列用作干预。然后计算 bctau 值和与其相关的其他统计参数(标准误差,p-values,等)
f <- db %>%
filter(Scalex == 1) %>%
unite(ID2x, IDx, Phasex) %>%
spread(ID2x, Ratex) %>%
dplyr::select(-Scalex, -Timex) %>%
data.frame()
#> f
# C1_A C1_B C2_A C2_B
#1 4 7 4 7
#2 8 7 3 7
#3 10 9 2 9
#4 12 14 2 14
#5 15 15 NA NA
#6 NA 16 NA NA
f1 <- f %>% select(C2_A, C2_B)
> g <- map2_df(.x = f1[seq(1, ncol(f1), 2)], .y = f1[seq(2, ncol(f1), 2)], ~ bctau(.x, .y))
> g
# A tibble: 1 x 4
# tau p se corrected
# <dbl> <dbl> <dbl> <lgl>
#1 0.784 0.0284 0.253 FALSE
代码似乎只有在函数 bctau 不需要调用函数 theil 时才有效(这意味着基线趋势不需要使用非参数 Theil-Sen 估计器跨 A 和 B 阶段进行校正).当调用 theil 函数时,也会调用包 Kendall 并且 NA 值似乎会产生一些问题。但是,我无法摆脱它们,因为基线和干预阶段的长度并不总是相同。
g <- map2_df(.x = f[seq(1, ncol(f), 2)], .y = f[seq(2, ncol(f), 2)], ~ bctau(.x, .y))
#WARNING: Error exit, tauk2. IFAULT = 10
#Error in bind_rows_(x, .id) : Argument 7 must be length 1, not 6
我不一定要用 purrr,虽然它会很好。
更新
我能够解决部分问题。如果我删除 bctau 函数中 ** 之间的代码部分(我并不真正需要),第二个错误行将不再显示。
return(list(tau = tau, p = p, se = se, corrected = TRUE))
code removed from the bctau function: **int = intercept, slope = slope, correcteda = correcteda, correctedb = correctedb**
不幸的是,#WARNING: Error exit, tauk2. IFAULT = 10
仍然存在,它不允许在结果中报告更正后的估计。
g <- map2_df(.x = f[seq(1, ncol(f), 2)], .y = f[seq(2, ncol(f), 2)], ~ bctau(.x, .y))
#WARNING: Error exit, tauk2. IFAULT = 10
g
# A tibble: 2 x 4
# tau p se corrected
# <dbl> <dbl> <dbl> <lgl>
#1 1 1 0 TRUE
#2 0.784 0.0284 0.253 FALSE
更新 2
当我从列中手动删除 NA 值时,解决方案出现了。所以我的猜测是,当需要基线校正时,如果涉及 NA 值,theil
函数将无法计算新的估计值。有没有办法告诉函数不考虑 NA 值?
f2$C1_A
#[1] 4 8 10 12 15 NA
f2$C1_B
#[1] 7 7 9 14 15 16
#bl <- c(4, 8, 10, 12, 15, NA)
#i <- c(7, 7, 9, 14, 15, 16)
#bctau(bl, i)
#WARNING: Error exit, tauk2. IFAULT = 10
bl <- c(4, 8, 10, 12, 15) #remove NA manually
bl
#[1] 4 8 10 12 15
i
#[1] 7 7 9 14 15 16
bctau(bl, i) #calculate bctau
#$`tau`
#[1] -0.7385489
#$p
#[1] 0.008113123
#$se
#[1] 0.2874798
#$corrected
#[1] TRUE
我认为您需要通过 Scalex 和 IDx 嵌套 data.frame,然后在嵌套的 data.frame 上使用匿名函数。我认为这段代码可以满足您的需求:
db %>%
spread(Phasex, Ratex) %>%
group_by(Scalex, IDx) %>%
nest() %>%
mutate(m = map(data, function(d) bctau(a = d$A, b = d$B))) %>%
unnest(m)
我一直在尝试将函数 bctau(以及其中包含的 theil)映射到数据集的选定元素上。此函数使用 two-step 过程来计算 AB single-case 设计的效果大小。它需要两个参数(a = 基线阶段的所有值;b = 干预阶段的所有值)。下面,您可以找到 Tarlow (2017) 开发的两个功能。
library(Kendall)
library(dplyr)
library(purrr)
library(tidyr)
bctau <- function(a,b) {
# The bctau() function accepts two arguments, a and b, which
# are vectors for each phase in an AB single-case design
n <- length(a) + length(b)
ta <- 1:(length(a))
tb <- (length(a) + 1):(length(a) + length(b))
# if baseline trend is not statistically significant,
# return tau result (no trend correction)
if (Kendall(a,ta)$sl > .05) {
results <- Kendall(c(a,b), c(rep(0,length(a)), rep(1,length(b))))
tau <- as.numeric(results$tau)
p <- as.numeric(results$sl)
se <- sqrt((2/n) * (1 - (tau^2)))
return(list(tau = tau, p = p, se = se, corrected = FALSE))
}
# if baseline trend is statistically significant,
# get Theil-Sen residuals
theilsen <- theil(ta, a)
slope <- theilsen$slope
intercept <- theilsen$int
correcteda <- as.numeric()
correctedb <- as.numeric()
for (i in 1:length(a)) {
correcteda[i] <- a[i] - (slope*i + intercept)
}
for (i in 1:length(b)) {
correctedb[i] <- b[i] - (slope*(i + length(a)) + intercept)
}
results <- Kendall(c(correcteda,correctedb),c(rep(0,length(a)),rep(1,length(b))))
tau <- as.numeric(results$tau)
p <- as.numeric(results$sl)
se <- sqrt((2/n) * (1 - (tau^2)))
return(list(tau = tau, p = p, se = se, corrected = TRUE, int = intercept, slope = slope, correcteda = correcteda, correctedb = correctedb))
}
theil <- function(x,y) {
# returns theil-sen slope and intercept estimates;
# x and y are two equal length vectors (x & y coords)
n <- length(x)
slopes <- as.numeric()
ints <- as.numeric()
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
slopes <- c(slopes, ((y[j] - y[i]) / (x[j] - x[i])))
}
}
b <- median(slopes)
for (i in 1:n) {
ints <- c(ints, (y[i] - (b*x[i])))
}
results <- list(slope = b, int = median(ints))
return(results)
}
我的数据集由五列组成: 1. Scalex:为参与者评分的行为量表; 2. IDx:参与者ID(注意每个参与者完成了两个量表); 3. Timex:session的个数(Phase每次变化时re-starts); 4.阶段:基线(A)或干预阶段(B); 5. Ratex:评级量表分数(从 1 到 20)。
Scalex <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
IDx <- c("C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2")
Timex <- c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 5)
Phasex <- c("A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "B")
Ratex <- c(4, 8, 10, 12, 15, 7, 7, 9, 14, 15, 16, 4, 3, 2, 2, 7, 7, 9, 14, 2, 3, 6, 6, 7, 5, 9, 11, 5, 6, 3, 4, 8, 7, 9, 3, 3)
db <- data.frame(Scalex, IDx, Timex, Phasex, Ratex)
我想做的是按比例对我的数据进行分组,然后将函数 bctau() 应用于每个参与者。我尝试将参与者嵌套到量表中,这就是结果。
d <- db %>%
group_by(Scalex) %>%
nest()
# A tibble: 2 x 2
# Scalex data
# <dbl> <list>
#1 1 <tibble [19 x 4]>
#2 2 <tibble [17 x 4]>
这是其中一个嵌套小标题的样子。 Phasex 表示评级是在基线阶段 (A) 还是干预阶段 (B) 进行的,而 Ratex 表示学生的行为评级分数。
d$data[[1]]
#[[1]]
# # A tibble: 19 x 4
# IDx Timex Phasex Ratex
# <fct> <dbl> <fct> <dbl>
# 1 C1 1 A 4
# 2 C1 2 A 8
# 3 C1 3 A 10
# 4 C1 4 A 12
# 5 C1 5 A 15
# 6 C1 1 B 7
# 7 C1 2 B 7
# 8 C1 3 B 9
# 9 C1 4 B 14
#10 C1 5 B 15
#11 C1 6 B 16
#12 C2 1 A 4
#13 C2 2 A 3
#14 C2 3 A 2
#15 C2 4 A 2
#16 C2 1 B 7
#17 C2 2 B 7
#18 C2 3 B 9
#19 C2 4 B 14
我试着写了这段代码。我使用了 map2_df 因为我使用了两个变量。我的代码每次采用两列,并将第一列用作基线,将第二列用作干预。然后计算 bctau 值和与其相关的其他统计参数(标准误差,p-values,等)
f <- db %>%
filter(Scalex == 1) %>%
unite(ID2x, IDx, Phasex) %>%
spread(ID2x, Ratex) %>%
dplyr::select(-Scalex, -Timex) %>%
data.frame()
#> f
# C1_A C1_B C2_A C2_B
#1 4 7 4 7
#2 8 7 3 7
#3 10 9 2 9
#4 12 14 2 14
#5 15 15 NA NA
#6 NA 16 NA NA
f1 <- f %>% select(C2_A, C2_B)
> g <- map2_df(.x = f1[seq(1, ncol(f1), 2)], .y = f1[seq(2, ncol(f1), 2)], ~ bctau(.x, .y))
> g
# A tibble: 1 x 4
# tau p se corrected
# <dbl> <dbl> <dbl> <lgl>
#1 0.784 0.0284 0.253 FALSE
代码似乎只有在函数 bctau 不需要调用函数 theil 时才有效(这意味着基线趋势不需要使用非参数 Theil-Sen 估计器跨 A 和 B 阶段进行校正).当调用 theil 函数时,也会调用包 Kendall 并且 NA 值似乎会产生一些问题。但是,我无法摆脱它们,因为基线和干预阶段的长度并不总是相同。
g <- map2_df(.x = f[seq(1, ncol(f), 2)], .y = f[seq(2, ncol(f), 2)], ~ bctau(.x, .y))
#WARNING: Error exit, tauk2. IFAULT = 10
#Error in bind_rows_(x, .id) : Argument 7 must be length 1, not 6
我不一定要用 purrr,虽然它会很好。
更新
我能够解决部分问题。如果我删除 bctau 函数中 ** 之间的代码部分(我并不真正需要),第二个错误行将不再显示。
return(list(tau = tau, p = p, se = se, corrected = TRUE))
code removed from the bctau function: **int = intercept, slope = slope, correcteda = correcteda, correctedb = correctedb**
不幸的是,#WARNING: Error exit, tauk2. IFAULT = 10
仍然存在,它不允许在结果中报告更正后的估计。
g <- map2_df(.x = f[seq(1, ncol(f), 2)], .y = f[seq(2, ncol(f), 2)], ~ bctau(.x, .y))
#WARNING: Error exit, tauk2. IFAULT = 10
g
# A tibble: 2 x 4
# tau p se corrected
# <dbl> <dbl> <dbl> <lgl>
#1 1 1 0 TRUE
#2 0.784 0.0284 0.253 FALSE
更新 2
当我从列中手动删除 NA 值时,解决方案出现了。所以我的猜测是,当需要基线校正时,如果涉及 NA 值,theil
函数将无法计算新的估计值。有没有办法告诉函数不考虑 NA 值?
f2$C1_A
#[1] 4 8 10 12 15 NA
f2$C1_B
#[1] 7 7 9 14 15 16
#bl <- c(4, 8, 10, 12, 15, NA)
#i <- c(7, 7, 9, 14, 15, 16)
#bctau(bl, i)
#WARNING: Error exit, tauk2. IFAULT = 10
bl <- c(4, 8, 10, 12, 15) #remove NA manually
bl
#[1] 4 8 10 12 15
i
#[1] 7 7 9 14 15 16
bctau(bl, i) #calculate bctau
#$`tau`
#[1] -0.7385489
#$p
#[1] 0.008113123
#$se
#[1] 0.2874798
#$corrected
#[1] TRUE
我认为您需要通过 Scalex 和 IDx 嵌套 data.frame,然后在嵌套的 data.frame 上使用匿名函数。我认为这段代码可以满足您的需求:
db %>%
spread(Phasex, Ratex) %>%
group_by(Scalex, IDx) %>%
nest() %>%
mutate(m = map(data, function(d) bctau(a = d$A, b = d$B))) %>%
unnest(m)