函数在映射时返回所有值而不是一个值

Function is returning all values instead of one when mapped

希望得到一些关于如何映射一个函数的建议,该函数 return 是一个句子的可读性分数向量(最终将它们全部绑定)。我尝试了两种不同的方法,但到目前为止只弄清楚了如何通过 for 循环获得它。

library(quanteda.textstats)
haiku_df <- data.frame(id = c(1,2,3), 
                       sentences = c("Mapping a function",
                                     "Can sometimes leave me with more",
                                     "Questions than answers"))

我认为这会 return 每个列表的分数向量,但它会重复 nrow(haiku_df) 次:

scores <- function(text,id){
  flesch_score <- textstat_readability(text, measure = "Flesch")$Flesch
  fog_score <- textstat_readability(text, measure = "FOG")$FOG
  row <- data.frame(id, flesch_score, fog_score)
  row
}
score_df <- list()
score_df <- lapply(haiku_df$sentences, scores, haiku_df$id)
score_df


> score_df
[[1]]
  id flesch_score fog_score
1  1        62.79       1.2
2  2        62.79       1.2
3  3        62.79       1.2

[[2]]
  id flesch_score fog_score
1  1      102.045       2.4
2  2      102.045       2.4
3  3      102.045       2.4

[[3]]
  id flesch_score fog_score
1  1        62.79       1.2
2  2        62.79       1.2
3  3        62.79       1.2

这是正确的方向,但仍然不正确(添加 n 作为参数):

scores2 <- function(text,id,n){
  flesch_score <- textstat_readability(text[n], measure = "Flesch")$Flesch
  fog_score <- textstat_readability(text[n], measure = "FOG")$FOG
  row <- data.frame(id[n], flesch_score, fog_score)
  row
}
score2_df <- list()
score2_df <- lapply(haiku_df$sentences, scores2, haiku_df$id, n = 1:nrow(haiku_df))


> score2_df
[[1]]
  id.n. flesch_score fleschkincaid_score
1     1        62.79            5.246667
2     2           NA                  NA
3     3           NA                  NA

[[2]]
  id.n. flesch_score fleschkincaid_score
1     1      102.045           0.5166667
2     2           NA                  NA
3     3           NA                  NA

[[3]]
  id.n. flesch_score fleschkincaid_score
1     1        62.79            5.246667
2     2           NA                  NA
3     3           NA                  NA

可信赖的 for 循环让我得到了我想要的东西,但显然在放大时速度较慢。

score3_df <- list()
for (i in 1:nrow(haiku_df)){
  score3_df[[i]] <- scores(haiku_df$sentences[i],haiku_df$id[i])
}

> dplyr::bind_rows(score3_df)
  id flesch_score fog_score
1  1       62.790       1.2
2  2      102.045       2.4
3  3       62.790       1.2

感觉我忽略了一些非常简单的事情,但似乎无法弄清楚。 谢谢!

我们需要 Map 而不是 lapply 因为函数输入是 'sentences' 的每个元素对应的 'id'

do.call(rbind, Map(scores, haiku_df$sentences, haiku_df$id))
#.                                 id flesch_score fog_score
#Mapping a function                1       62.790       1.2
#Can sometimes leave me with more  2      102.045       2.4
#Questions than answers            3       62.790       1.2

也可以写成

do.call(rbind, do.call(Map, c(f = scores, unname(haiku_df[2:1]))))

或使用tidyverse

library(dplyr)
library(purrr)
library(tidyr)
haiku_df %>%
      transmute(out = map2(sentences, id, scores)) %>%
      unnest(out)

-输出

# A tibble: 3 x 3
#     id flesch_score fog_score
#  <dbl>        <dbl>     <dbl>
#1     1         62.8       1.2
#2     2        102.        2.4
#3     3         62.8       1.2

或使用rowwise

haiku_df %>% 
    rowwise %>%
    transmute( scores(sentences, id)) %>%
    ungroup
# A tibble: 3 x 3
#     id flesch_score fog_score
#  <dbl>        <dbl>     <dbl>
#1     1         62.8       1.2
#2     2        102.        2.4
#3     3         62.8       1.2