是否可以向该主题模型脚本中添加一些代码,以使未受过 R 培训的同事能够轻松使用它?
Would it be possible to add some code to this topic model script to enable non r trained colleagues to use it easily?
我有一个主题模型脚本,我想将其作为闪亮的应用程序提供
我没有任何使用闪亮的经验,这将是我的第一次尝试。输入将始终是具有相同格式的 excel sheet,我希望输出是具有主题模型主题和子主题的 excel sheet。该脚本目前运行良好,但我希望在我转到其他团队时可以使用它。
F<-read.csv("SourceandText2.csv", stringsAsFactors = FALSE)
F$charsinfeedback<-sapply(F$Text, function(x) nchar(x))
F$wordsinfeedback<-sapply(strsplit(F$Text, "\s+"), length)
head(F$charsinfeedback)
F_stem<-str_replace_all(F$Text,"@","")
F_stem<-stemDocument(F_stem)
F_stem<-removePunctuation(F_stem)
F_stem<-tolower(F_stem)
F_stem<-stripWhitespace(F_stem)
F<-cbind(F,F_stem)
extendedstopwords<-c("a","amp","hark","day","via","harkiv","music","â€", "–", "–
", "it’s","â·â", "don’t" )
extendedstopwords<-c(extendedstopwords,
gsub("'","",grep("'",extendedstopwords,value = T)))
dtm.control<-list(
tolower=T,
removePunctuation=T,
removeNumbers=T,
stopwords=c(stopwords("english")),
stemming=F,
wordLengths=c(3,Inf),
weighting=weightTf
)
dtm<-DocumentTermMatrix(Corpus(VectorSource(F_stem)),
control = dtm.control)
dim(dtm)
dtm<-removeSparseTerms(dtm,0.999)
dim(dtm)
matrix<-as.matrix(dtm)
freq<-colSums(as.matrix(dtm))
length(freq)
ord<-order(freq,decreasing = T)
freq[head(ord)]
freq[tail(ord)]
findFreqTerms(dtm,lowfreq = 50)
set.seed(42)
wordcloud(names(freq),freq,min.freq = 10,colors = brewer.pal(6,"Dark2"))
burnin<-4000
iter<-2000
thin<-500
seed<-list(2003,5,63,10001,765)
nstart<-5
best<-TRUE
K<-10
rowTotals<-apply(dtm,1,sum)
empty.rows<-dtm[rowTotals==0,]$dimnames[1][[1]]
corpus3<-F_stem[as.numeric(empty.rows)]
dtm<-dtm[rowTotals>0,]
dim(dtm)
ldaOut3<-LDA(dtm,K,method="Gibbs", control=list(nstart=nstart, seed=seed, best=best, burnin=burnin, iter=iter, thin=thin))
ldaOut3.topics<-as.matrix(terms(ldaOut3))
write.csv(ldaOut3.topics, file = paste("LDAGibbs",K,"K3DocsToTopics.csv"))
ldaOut3.terms<-as.matrix(terms(ldaOut3,10))
write.csv(ldaOut3.terms,file = paste("LDAGibbs",K,"TopicsToTerms.csv"))
topicProbabilities3<-as.data.frame(ldaOut3@gamma)
write.csv(topicProbabilities3,file = paste("LDAGibbs",K,"TopicProbabilities.csv"))
是的,你可以把它写成一个闪亮的应用程序。使用 fileInput()
函数允许用户输入 excel 文件,使用 downloadButton()
允许用户在最后下载输出文件。您应该尝试将您的代码转换为 shiny,如果您 运行 遇到问题,请提出具体问题。
我有一个主题模型脚本,我想将其作为闪亮的应用程序提供
我没有任何使用闪亮的经验,这将是我的第一次尝试。输入将始终是具有相同格式的 excel sheet,我希望输出是具有主题模型主题和子主题的 excel sheet。该脚本目前运行良好,但我希望在我转到其他团队时可以使用它。
F<-read.csv("SourceandText2.csv", stringsAsFactors = FALSE)
F$charsinfeedback<-sapply(F$Text, function(x) nchar(x))
F$wordsinfeedback<-sapply(strsplit(F$Text, "\s+"), length)
head(F$charsinfeedback)
F_stem<-str_replace_all(F$Text,"@","")
F_stem<-stemDocument(F_stem)
F_stem<-removePunctuation(F_stem)
F_stem<-tolower(F_stem)
F_stem<-stripWhitespace(F_stem)
F<-cbind(F,F_stem)
extendedstopwords<-c("a","amp","hark","day","via","harkiv","music","â€", "–", "–
", "it’s","â·â", "don’t" )
extendedstopwords<-c(extendedstopwords,
gsub("'","",grep("'",extendedstopwords,value = T)))
dtm.control<-list(
tolower=T,
removePunctuation=T,
removeNumbers=T,
stopwords=c(stopwords("english")),
stemming=F,
wordLengths=c(3,Inf),
weighting=weightTf
)
dtm<-DocumentTermMatrix(Corpus(VectorSource(F_stem)),
control = dtm.control)
dim(dtm)
dtm<-removeSparseTerms(dtm,0.999)
dim(dtm)
matrix<-as.matrix(dtm)
freq<-colSums(as.matrix(dtm))
length(freq)
ord<-order(freq,decreasing = T)
freq[head(ord)]
freq[tail(ord)]
findFreqTerms(dtm,lowfreq = 50)
set.seed(42)
wordcloud(names(freq),freq,min.freq = 10,colors = brewer.pal(6,"Dark2"))
burnin<-4000
iter<-2000
thin<-500
seed<-list(2003,5,63,10001,765)
nstart<-5
best<-TRUE
K<-10
rowTotals<-apply(dtm,1,sum)
empty.rows<-dtm[rowTotals==0,]$dimnames[1][[1]]
corpus3<-F_stem[as.numeric(empty.rows)]
dtm<-dtm[rowTotals>0,]
dim(dtm)
ldaOut3<-LDA(dtm,K,method="Gibbs", control=list(nstart=nstart, seed=seed, best=best, burnin=burnin, iter=iter, thin=thin))
ldaOut3.topics<-as.matrix(terms(ldaOut3))
write.csv(ldaOut3.topics, file = paste("LDAGibbs",K,"K3DocsToTopics.csv"))
ldaOut3.terms<-as.matrix(terms(ldaOut3,10))
write.csv(ldaOut3.terms,file = paste("LDAGibbs",K,"TopicsToTerms.csv"))
topicProbabilities3<-as.data.frame(ldaOut3@gamma)
write.csv(topicProbabilities3,file = paste("LDAGibbs",K,"TopicProbabilities.csv"))
是的,你可以把它写成一个闪亮的应用程序。使用 fileInput()
函数允许用户输入 excel 文件,使用 downloadButton()
允许用户在最后下载输出文件。您应该尝试将您的代码转换为 shiny,如果您 运行 遇到问题,请提出具体问题。