自动化脚本来替换一个词?
Automate script to replace a word?
我正在为单词列表创建比例图。
有没有办法自动执行此脚本,这样我就不必将每个新单词复制并粘贴到适当的位置?
在下面的脚本中,单词是“研究”。
dfm_research <- dfm(corpus_toks) %>%
dfm_group(groups = "Year") %>%
dfm_weight(scheme = "prop") %>%
dfm_select(pattern = c("research"))
colnames(dfm_research)[1] <- "research"
dfm_research2 <- convert(dfm_research, to = "data.frame")
dfm_research2$doc_id <- as.numeric(dfm_research2$doc_id)
# plot
dfm_research2$research <- round(dfm_research2$research, digits = 5)
dfm_research3 <- melt(dfm_research2, id.vars = "doc_id")
options(scipen = 999)
researchprop <- ggplot(data = dfm_research3, aes(x = doc_id, y = value))+
geom_line(aes(x = doc_id, y = value),colour = "red") +
scale_x_continuous(limits = c(1999, 2019), breaks = c(seq(1999, 2019, 1))) +
scale_y_continuous(limits = c(0, 0.005), breaks = c(seq(0, 0.005, 0.001))) +
labs(title = "Research by proportions") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
我们可以将它包装在一个函数中
f1 <- function(corpus_obj, word) {
word_new <- sub("^(.)(.*)", "\1\L\2", toupper(word), perl = TRUE)
dfm_research <- dfm(corpus_obj) %>%
dfm_group(groups = "Year") %>%
dfm_weight(scheme = "prop") %>%
dfm_select(pattern = word)
colnames(dfm_research)[1] <- word
dfm_research2 <- convert(dfm_research, to = "data.frame")
dfm_research2$doc_id <- as.numeric(dfm_research2$doc_id)
# plot
dfm_research2[[word]] <- round(dfm_research2[[word]], digits = 5)
dfm_research3 <- melt(dfm_research2, id.vars = "doc_id")
options(scipen = 999)
ggplot(data = dfm_research3, aes(x = doc_id, y = value))+
geom_line(aes(x = doc_id, y = value),colour = "red") +
scale_x_continuous(limits = c(1999, 2019), breaks = c(seq(1999, 2019, 1))) +
scale_y_continuous(limits = c(0, 0.005), breaks = c(seq(0, 0.005, 0.001))) +
labs(title = paste0(word_new, " by proportions")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
然后调用为
f1(corpus_toks, "research")
如果有多个单词,用lapply
循环
word_vector <- c('research', 'anotherword', ....)
lapply(word_vector, function(x) f1(corpus_toks, x))
我正在为单词列表创建比例图。 有没有办法自动执行此脚本,这样我就不必将每个新单词复制并粘贴到适当的位置?
在下面的脚本中,单词是“研究”。
dfm_research <- dfm(corpus_toks) %>%
dfm_group(groups = "Year") %>%
dfm_weight(scheme = "prop") %>%
dfm_select(pattern = c("research"))
colnames(dfm_research)[1] <- "research"
dfm_research2 <- convert(dfm_research, to = "data.frame")
dfm_research2$doc_id <- as.numeric(dfm_research2$doc_id)
# plot
dfm_research2$research <- round(dfm_research2$research, digits = 5)
dfm_research3 <- melt(dfm_research2, id.vars = "doc_id")
options(scipen = 999)
researchprop <- ggplot(data = dfm_research3, aes(x = doc_id, y = value))+
geom_line(aes(x = doc_id, y = value),colour = "red") +
scale_x_continuous(limits = c(1999, 2019), breaks = c(seq(1999, 2019, 1))) +
scale_y_continuous(limits = c(0, 0.005), breaks = c(seq(0, 0.005, 0.001))) +
labs(title = "Research by proportions") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
我们可以将它包装在一个函数中
f1 <- function(corpus_obj, word) {
word_new <- sub("^(.)(.*)", "\1\L\2", toupper(word), perl = TRUE)
dfm_research <- dfm(corpus_obj) %>%
dfm_group(groups = "Year") %>%
dfm_weight(scheme = "prop") %>%
dfm_select(pattern = word)
colnames(dfm_research)[1] <- word
dfm_research2 <- convert(dfm_research, to = "data.frame")
dfm_research2$doc_id <- as.numeric(dfm_research2$doc_id)
# plot
dfm_research2[[word]] <- round(dfm_research2[[word]], digits = 5)
dfm_research3 <- melt(dfm_research2, id.vars = "doc_id")
options(scipen = 999)
ggplot(data = dfm_research3, aes(x = doc_id, y = value))+
geom_line(aes(x = doc_id, y = value),colour = "red") +
scale_x_continuous(limits = c(1999, 2019), breaks = c(seq(1999, 2019, 1))) +
scale_y_continuous(limits = c(0, 0.005), breaks = c(seq(0, 0.005, 0.001))) +
labs(title = paste0(word_new, " by proportions")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
然后调用为
f1(corpus_toks, "research")
如果有多个单词,用lapply
循环
word_vector <- c('research', 'anotherword', ....)
lapply(word_vector, function(x) f1(corpus_toks, x))