函数在映射时返回所有值而不是一个值
Function is returning all values instead of one when mapped
希望得到一些关于如何映射一个函数的建议,该函数 return 是一个句子的可读性分数向量(最终将它们全部绑定)。我尝试了两种不同的方法,但到目前为止只弄清楚了如何通过 for
循环获得它。
library(quanteda.textstats)
haiku_df <- data.frame(id = c(1,2,3),
sentences = c("Mapping a function",
"Can sometimes leave me with more",
"Questions than answers"))
我认为这会 return 每个列表的分数向量,但它会重复 nrow(haiku_df)
次:
scores <- function(text,id){
flesch_score <- textstat_readability(text, measure = "Flesch")$Flesch
fog_score <- textstat_readability(text, measure = "FOG")$FOG
row <- data.frame(id, flesch_score, fog_score)
row
}
score_df <- list()
score_df <- lapply(haiku_df$sentences, scores, haiku_df$id)
score_df
> score_df
[[1]]
id flesch_score fog_score
1 1 62.79 1.2
2 2 62.79 1.2
3 3 62.79 1.2
[[2]]
id flesch_score fog_score
1 1 102.045 2.4
2 2 102.045 2.4
3 3 102.045 2.4
[[3]]
id flesch_score fog_score
1 1 62.79 1.2
2 2 62.79 1.2
3 3 62.79 1.2
这是正确的方向,但仍然不正确(添加 n
作为参数):
scores2 <- function(text,id,n){
flesch_score <- textstat_readability(text[n], measure = "Flesch")$Flesch
fog_score <- textstat_readability(text[n], measure = "FOG")$FOG
row <- data.frame(id[n], flesch_score, fog_score)
row
}
score2_df <- list()
score2_df <- lapply(haiku_df$sentences, scores2, haiku_df$id, n = 1:nrow(haiku_df))
> score2_df
[[1]]
id.n. flesch_score fleschkincaid_score
1 1 62.79 5.246667
2 2 NA NA
3 3 NA NA
[[2]]
id.n. flesch_score fleschkincaid_score
1 1 102.045 0.5166667
2 2 NA NA
3 3 NA NA
[[3]]
id.n. flesch_score fleschkincaid_score
1 1 62.79 5.246667
2 2 NA NA
3 3 NA NA
可信赖的 for
循环让我得到了我想要的东西,但显然在放大时速度较慢。
score3_df <- list()
for (i in 1:nrow(haiku_df)){
score3_df[[i]] <- scores(haiku_df$sentences[i],haiku_df$id[i])
}
> dplyr::bind_rows(score3_df)
id flesch_score fog_score
1 1 62.790 1.2
2 2 102.045 2.4
3 3 62.790 1.2
感觉我忽略了一些非常简单的事情,但似乎无法弄清楚。
谢谢!
我们需要 Map
而不是 lapply
因为函数输入是 'sentences' 的每个元素对应的 'id'
do.call(rbind, Map(scores, haiku_df$sentences, haiku_df$id))
#. id flesch_score fog_score
#Mapping a function 1 62.790 1.2
#Can sometimes leave me with more 2 102.045 2.4
#Questions than answers 3 62.790 1.2
也可以写成
do.call(rbind, do.call(Map, c(f = scores, unname(haiku_df[2:1]))))
或使用tidyverse
library(dplyr)
library(purrr)
library(tidyr)
haiku_df %>%
transmute(out = map2(sentences, id, scores)) %>%
unnest(out)
-输出
# A tibble: 3 x 3
# id flesch_score fog_score
# <dbl> <dbl> <dbl>
#1 1 62.8 1.2
#2 2 102. 2.4
#3 3 62.8 1.2
或使用rowwise
haiku_df %>%
rowwise %>%
transmute( scores(sentences, id)) %>%
ungroup
# A tibble: 3 x 3
# id flesch_score fog_score
# <dbl> <dbl> <dbl>
#1 1 62.8 1.2
#2 2 102. 2.4
#3 3 62.8 1.2
希望得到一些关于如何映射一个函数的建议,该函数 return 是一个句子的可读性分数向量(最终将它们全部绑定)。我尝试了两种不同的方法,但到目前为止只弄清楚了如何通过 for
循环获得它。
library(quanteda.textstats)
haiku_df <- data.frame(id = c(1,2,3),
sentences = c("Mapping a function",
"Can sometimes leave me with more",
"Questions than answers"))
我认为这会 return 每个列表的分数向量,但它会重复 nrow(haiku_df)
次:
scores <- function(text,id){
flesch_score <- textstat_readability(text, measure = "Flesch")$Flesch
fog_score <- textstat_readability(text, measure = "FOG")$FOG
row <- data.frame(id, flesch_score, fog_score)
row
}
score_df <- list()
score_df <- lapply(haiku_df$sentences, scores, haiku_df$id)
score_df
> score_df
[[1]]
id flesch_score fog_score
1 1 62.79 1.2
2 2 62.79 1.2
3 3 62.79 1.2
[[2]]
id flesch_score fog_score
1 1 102.045 2.4
2 2 102.045 2.4
3 3 102.045 2.4
[[3]]
id flesch_score fog_score
1 1 62.79 1.2
2 2 62.79 1.2
3 3 62.79 1.2
这是正确的方向,但仍然不正确(添加 n
作为参数):
scores2 <- function(text,id,n){
flesch_score <- textstat_readability(text[n], measure = "Flesch")$Flesch
fog_score <- textstat_readability(text[n], measure = "FOG")$FOG
row <- data.frame(id[n], flesch_score, fog_score)
row
}
score2_df <- list()
score2_df <- lapply(haiku_df$sentences, scores2, haiku_df$id, n = 1:nrow(haiku_df))
> score2_df
[[1]]
id.n. flesch_score fleschkincaid_score
1 1 62.79 5.246667
2 2 NA NA
3 3 NA NA
[[2]]
id.n. flesch_score fleschkincaid_score
1 1 102.045 0.5166667
2 2 NA NA
3 3 NA NA
[[3]]
id.n. flesch_score fleschkincaid_score
1 1 62.79 5.246667
2 2 NA NA
3 3 NA NA
可信赖的 for
循环让我得到了我想要的东西,但显然在放大时速度较慢。
score3_df <- list()
for (i in 1:nrow(haiku_df)){
score3_df[[i]] <- scores(haiku_df$sentences[i],haiku_df$id[i])
}
> dplyr::bind_rows(score3_df)
id flesch_score fog_score
1 1 62.790 1.2
2 2 102.045 2.4
3 3 62.790 1.2
感觉我忽略了一些非常简单的事情,但似乎无法弄清楚。 谢谢!
我们需要 Map
而不是 lapply
因为函数输入是 'sentences' 的每个元素对应的 'id'
do.call(rbind, Map(scores, haiku_df$sentences, haiku_df$id))
#. id flesch_score fog_score
#Mapping a function 1 62.790 1.2
#Can sometimes leave me with more 2 102.045 2.4
#Questions than answers 3 62.790 1.2
也可以写成
do.call(rbind, do.call(Map, c(f = scores, unname(haiku_df[2:1]))))
或使用tidyverse
library(dplyr)
library(purrr)
library(tidyr)
haiku_df %>%
transmute(out = map2(sentences, id, scores)) %>%
unnest(out)
-输出
# A tibble: 3 x 3
# id flesch_score fog_score
# <dbl> <dbl> <dbl>
#1 1 62.8 1.2
#2 2 102. 2.4
#3 3 62.8 1.2
或使用rowwise
haiku_df %>%
rowwise %>%
transmute( scores(sentences, id)) %>%
ungroup
# A tibble: 3 x 3
# id flesch_score fog_score
# <dbl> <dbl> <dbl>
#1 1 62.8 1.2
#2 2 102. 2.4
#3 3 62.8 1.2