在向量化函数中调用向量化函数
Calling vectorized functions within vectorized functions
我正在为跨电池管理的一系列认知测试编写评分代码。在下面的示例中,我有一个名为 SHAPES_v1 的虚拟测试,但在我的应用程序中,有许多不同版本的不同测试。我试图通过使用 sapply() 和 Vectorize() 来矢量化我的函数,但输出 (scored_battery_1
) 与我试图实现的目标 (desired_output
) 不匹配。当我 运行 在示例项目的每个函数中单独调用时,一切正常,所以我很确定我的矢量化失败了。我已经实施了 Vectorize(),并且 sapply() 被注释掉了。 Vectorize() 方法包含正确的输出,但仍有初始变量,并且是嵌套列表而不是数据框。知道我做错了什么吗?
library('dplyr')
battery_1 <- data.frame(PID=paste0('PID', 1:5), SHAPES_v1_QID1_RESP=c(rep(4, 3),
rep(2, 2)), SHAPES_v1_QID2_RESP=c(rep(2, 3), rep(3, 2)),
LETTERS_v1_QID1_RESP=c(rep(5, 3), rep(2, 2)),
LETTERS_v1_QID2_RESP=c(rep(5, 1), rep(6, 4)))
SHAPES_v1 <- data.frame(QID=1:2, CorrectResponse=c(4, 3))
LETTERS_v1 <- data.frame(QID=1:2, CorrectResponse=c(5, 6))
########### Simplify names
simpNames <- function(i, varnames) {
return(paste(varnames[[i]][1], varnames[[i]][2], sep='_'))
}
simpNames <- Vectorize(simpNames, vectorize.args='i', SIMPLIFY=TRUE)
########### Score a specific item
scoreItem <- function(battery, answers, item, num) {
corrItem <- gsub('RESP', 'CORR', item)
ans <- answers[answers$QID == num, 'CorrectResponse']
battery <- battery %>% mutate_at( .funs = funs(ifelse(. == ans,
yes = 1, no = 0)), .vars = item)
names(battery)[names(battery) == item] <- corrItem
return(battery)
}
scoreItem <- Vectorize(scoreItem, vectorize.args=c('item', 'num'), SIMPLIFY=FALSE)
########### Score a specific test
scoreTest <- function(battery, test) {
if (exists(test) & length(grep('DISC', test)) == 0) {
answers <- get(test)
# List items
items <- paste0(test, '_', 'QID', answers$QID, '_RESP')
nums <- answers$QID
# Score items
battery <- scoreItem(battery, answers, items, nums)
#battery <- sapply(1:length(nums), function(i) scoreItem(battery, answers, items[i], nums[i]))
} else {
print(paste('Answer key does not exist for', test))
}
return(battery)
}
scoreTest <- Vectorize(scoreTest, vectorize.args=c('test'), SIMPLIFY=FALSE)
########### Score the whole battery
score <- function(battery) {
varnames <- names(battery)[!(names(battery) %in% grep('PID', names(battery), value=TRUE))]
varnames <- strsplit(varnames, '_')
varnames <- simpNames(1:length(varnames), varnames)
tests <- unique(varnames)
# Score a specific test
battery <- scoreTest(battery, tests)
#battery <- sapply(1:length(tests), function(i) scoreTest(battery, tests[i]))
return(battery)
}
#################### Score the batteries ####################
scored_battery_1 <- score(battery_1)
scored_battery_1
####################### Desired Output ######################
desired_output <- data.frame(PID=paste0('PID', 1:5), SHAPES_v1_QID1_CORR=c(rep(1, 3),
rep(0, 2)), SHAPES_v1_QID2_CORR=c(rep(0, 3), rep(1, 2)),
LETTERS_v1_QID1_CORR=c(rep(1, 3), rep(0, 2)),
LETTERS_v1_QID2_CORR=c(rep(0, 1), rep(1, 4)))
desired_output
不知怎的,我觉得你把一些事情复杂化了。
我已经尝试完成您所描述的相同输出。让我知道以下是否适合您:
library(dplyr)
library(tidyr)
library(purrr)
score <- function(battery) {
battery %>%
pivot_longer(-PID, names_to = 'response_id', values_to = 'response_value') %>%
mutate(
test_name = str_extract(response_id, '^[^_]+_[^_]+(?=_)'),
QID = as.integer(str_extract(response_id, '(?<=QID)\d+(?=_)'))
) %>%
filter(test_name %in% ls(envir = .GlobalEnv)) %>%
split(f = .$test_name) %>%
imap(.f = function(test_results, test_name){
test_results %>%
left_join(get(test_name), by = 'QID') %>%
filter(!is.na(CorrectResponse)) %>%
mutate(
is_correct = as.integer(response_value == CorrectResponse)
)
}) %>%
do.call(bind_rows, .) %>%
select(PID, response_id, is_correct) %>%
spread(key = response_id, value = is_correct)
}
这实际上是在执行以下操作:
- 使用
pivot_longer
将响应列转换为按行表示,将 PID
列留在原位
- 提取
test_name
和 QID
,我认为你需要它们来评分
- 仅过滤我们已加载响应的测试
- 将数据框拆分成一个列表,这样我们就可以...
- ...将正确的响应 df 左连接到每个块上,然后对测试进行评分
- 将数据帧重新加入一次
- select只有
PID
列,原来的列名和我们的分数
- 将它们再次展开成列格式
多田:)
我正在为跨电池管理的一系列认知测试编写评分代码。在下面的示例中,我有一个名为 SHAPES_v1 的虚拟测试,但在我的应用程序中,有许多不同版本的不同测试。我试图通过使用 sapply() 和 Vectorize() 来矢量化我的函数,但输出 (scored_battery_1
) 与我试图实现的目标 (desired_output
) 不匹配。当我 运行 在示例项目的每个函数中单独调用时,一切正常,所以我很确定我的矢量化失败了。我已经实施了 Vectorize(),并且 sapply() 被注释掉了。 Vectorize() 方法包含正确的输出,但仍有初始变量,并且是嵌套列表而不是数据框。知道我做错了什么吗?
library('dplyr')
battery_1 <- data.frame(PID=paste0('PID', 1:5), SHAPES_v1_QID1_RESP=c(rep(4, 3),
rep(2, 2)), SHAPES_v1_QID2_RESP=c(rep(2, 3), rep(3, 2)),
LETTERS_v1_QID1_RESP=c(rep(5, 3), rep(2, 2)),
LETTERS_v1_QID2_RESP=c(rep(5, 1), rep(6, 4)))
SHAPES_v1 <- data.frame(QID=1:2, CorrectResponse=c(4, 3))
LETTERS_v1 <- data.frame(QID=1:2, CorrectResponse=c(5, 6))
########### Simplify names
simpNames <- function(i, varnames) {
return(paste(varnames[[i]][1], varnames[[i]][2], sep='_'))
}
simpNames <- Vectorize(simpNames, vectorize.args='i', SIMPLIFY=TRUE)
########### Score a specific item
scoreItem <- function(battery, answers, item, num) {
corrItem <- gsub('RESP', 'CORR', item)
ans <- answers[answers$QID == num, 'CorrectResponse']
battery <- battery %>% mutate_at( .funs = funs(ifelse(. == ans,
yes = 1, no = 0)), .vars = item)
names(battery)[names(battery) == item] <- corrItem
return(battery)
}
scoreItem <- Vectorize(scoreItem, vectorize.args=c('item', 'num'), SIMPLIFY=FALSE)
########### Score a specific test
scoreTest <- function(battery, test) {
if (exists(test) & length(grep('DISC', test)) == 0) {
answers <- get(test)
# List items
items <- paste0(test, '_', 'QID', answers$QID, '_RESP')
nums <- answers$QID
# Score items
battery <- scoreItem(battery, answers, items, nums)
#battery <- sapply(1:length(nums), function(i) scoreItem(battery, answers, items[i], nums[i]))
} else {
print(paste('Answer key does not exist for', test))
}
return(battery)
}
scoreTest <- Vectorize(scoreTest, vectorize.args=c('test'), SIMPLIFY=FALSE)
########### Score the whole battery
score <- function(battery) {
varnames <- names(battery)[!(names(battery) %in% grep('PID', names(battery), value=TRUE))]
varnames <- strsplit(varnames, '_')
varnames <- simpNames(1:length(varnames), varnames)
tests <- unique(varnames)
# Score a specific test
battery <- scoreTest(battery, tests)
#battery <- sapply(1:length(tests), function(i) scoreTest(battery, tests[i]))
return(battery)
}
#################### Score the batteries ####################
scored_battery_1 <- score(battery_1)
scored_battery_1
####################### Desired Output ######################
desired_output <- data.frame(PID=paste0('PID', 1:5), SHAPES_v1_QID1_CORR=c(rep(1, 3),
rep(0, 2)), SHAPES_v1_QID2_CORR=c(rep(0, 3), rep(1, 2)),
LETTERS_v1_QID1_CORR=c(rep(1, 3), rep(0, 2)),
LETTERS_v1_QID2_CORR=c(rep(0, 1), rep(1, 4)))
desired_output
不知怎的,我觉得你把一些事情复杂化了。
我已经尝试完成您所描述的相同输出。让我知道以下是否适合您:
library(dplyr)
library(tidyr)
library(purrr)
score <- function(battery) {
battery %>%
pivot_longer(-PID, names_to = 'response_id', values_to = 'response_value') %>%
mutate(
test_name = str_extract(response_id, '^[^_]+_[^_]+(?=_)'),
QID = as.integer(str_extract(response_id, '(?<=QID)\d+(?=_)'))
) %>%
filter(test_name %in% ls(envir = .GlobalEnv)) %>%
split(f = .$test_name) %>%
imap(.f = function(test_results, test_name){
test_results %>%
left_join(get(test_name), by = 'QID') %>%
filter(!is.na(CorrectResponse)) %>%
mutate(
is_correct = as.integer(response_value == CorrectResponse)
)
}) %>%
do.call(bind_rows, .) %>%
select(PID, response_id, is_correct) %>%
spread(key = response_id, value = is_correct)
}
这实际上是在执行以下操作:
- 使用
pivot_longer
将响应列转换为按行表示,将PID
列留在原位 - 提取
test_name
和QID
,我认为你需要它们来评分 - 仅过滤我们已加载响应的测试
- 将数据框拆分成一个列表,这样我们就可以...
- ...将正确的响应 df 左连接到每个块上,然后对测试进行评分
- 将数据帧重新加入一次
- select只有
PID
列,原来的列名和我们的分数 - 将它们再次展开成列格式
多田:)