使用 grep 标记文本并在 r 中粘贴
Tag text using grep and paste in r
我有两个数据框。第一个:
keyword <- c("apple","peach","grape","berry","kiwi fruit")
keyword <- data.frame(keyword)
第二个:
sentence <- c("I like apple","I hate apple","grape is good")
url <- c("url1","url2","url3")
sentence <- data.frame(sentence,url)
我需要做的是:如果句子中有关键字,就把url粘贴到正文中。如果多个句子包含关键字,则粘贴所有url。最终结果是这样的:
我尝试使用下面的代码,但没有按预期运行。
keyword$Label <- character(length(keyword$keyword))
for (i in 1:length(keyword$keyword)) {
keyword$Label[grep(keyword$keyword[i],sentence$sentence)] <- sentence$url
}
stringr
+dplyr
+tidyr
的解决方案:
library(stringr)
library(dplyr)
library(tidyr)
sentence %>%
mutate(sentence = str_extract(sentence, paste0(keyword$keyword, collapse = "|"))) %>%
right_join(keyword, by = c("sentence" = "keyword")) %>%
group_by(sentence) %>%
mutate(URL = 1:n()) %>%
spread(URL, url, sep = "") %>%
rename(keyword = sentence)
结果:
# A tibble: 5 x 3
# Groups: keyword [5]
keyword URL1 URL2
* <chr> <chr> <chr>
1 apple url1 url2
2 berry <NA> <NA>
3 grape url3 <NA>
4 kiwi fruit <NA> <NA>
5 peach <NA> <NA>
数据:
keyword <- c("apple","peach","grape","berry","kiwi fruit")
keyword <- data.frame(keyword, stringsAsFactors = FALSE)
sentence <- c("I like apple","I hate apple","grape is good")
url <- c("url1","url2","url3")
sentence <- data.frame(sentence,url, stringsAsFactors = FALSE)
我有两个数据框。第一个:
keyword <- c("apple","peach","grape","berry","kiwi fruit")
keyword <- data.frame(keyword)
第二个:
sentence <- c("I like apple","I hate apple","grape is good")
url <- c("url1","url2","url3")
sentence <- data.frame(sentence,url)
我需要做的是:如果句子中有关键字,就把url粘贴到正文中。如果多个句子包含关键字,则粘贴所有url。最终结果是这样的:
我尝试使用下面的代码,但没有按预期运行。
keyword$Label <- character(length(keyword$keyword))
for (i in 1:length(keyword$keyword)) {
keyword$Label[grep(keyword$keyword[i],sentence$sentence)] <- sentence$url
}
stringr
+dplyr
+tidyr
的解决方案:
library(stringr)
library(dplyr)
library(tidyr)
sentence %>%
mutate(sentence = str_extract(sentence, paste0(keyword$keyword, collapse = "|"))) %>%
right_join(keyword, by = c("sentence" = "keyword")) %>%
group_by(sentence) %>%
mutate(URL = 1:n()) %>%
spread(URL, url, sep = "") %>%
rename(keyword = sentence)
结果:
# A tibble: 5 x 3
# Groups: keyword [5]
keyword URL1 URL2
* <chr> <chr> <chr>
1 apple url1 url2
2 berry <NA> <NA>
3 grape url3 <NA>
4 kiwi fruit <NA> <NA>
5 peach <NA> <NA>
数据:
keyword <- c("apple","peach","grape","berry","kiwi fruit")
keyword <- data.frame(keyword, stringsAsFactors = FALSE)
sentence <- c("I like apple","I hate apple","grape is good")
url <- c("url1","url2","url3")
sentence <- data.frame(sentence,url, stringsAsFactors = FALSE)