使用已检查的元素在 Shiny Application 中训练机器学习算法
Use checked elements for training a machine learning algorithm in a Shiny Application
出于训练目的,我想制作一个 Shiny 应用程序,您可以在其中检查列名并将其用于训练随机森林算法。
我的 Shiny 应用程序如下所示:
library(shiny)
library(DT)
library(titanic)
library(randomForest)
ui <- fluidPage(
DT::dataTableOutput("mytable"),
checkboxInput("checkbox" , label = "Pclass", value = FALSE),
checkboxInput("checkbox" , label = "Sex", value = FALSE),
checkboxInput("checkbox" , label = "Age", value = FALSE),
actionButton("runRF", "Predict"),
plotOutput("plotRF")
)
server <- function(input, output, session) {
output$mytable = DT::renderDataTable({
titanic_train
})
observeEvent(input$runRF, {
var = c("Pclass")
fit <- randomForest(as.factor(Survived) ~ var, data = titanic_train, importance = TRUE, ntree=2000)
prediction <- as.numeric(predict(fit, titanic_test))
titanic_test$predicted <- prediction
output$plotRF <- renderPlot({
hist(prediction)
})
})
}
shinyApp(ui, server)
基本上上面的代码在我做类似的事情时有效:
fit <- randomForest(as.factor(Survived) ~ Age, data = titanic_train, importance = TRUE, ntree=2000)
或
fit <- randomForest(as.factor(Survived) ~ Pclass + Age, data = titanic_train, importance = TRUE, ntree=2000)
但是我想让训练变量依赖于你选中的框。所以如果你检查 Age + Pclass 它应该是:
fit <- randomForest(as.factor(Survived) ~ Pclass + Age, data = titanic_train, importance = TRUE, ntree=2000)
如果您检查年龄:
fit <- randomForest(as.factor(Survived) ~ Age, data = titanic_train, importance = TRUE, ntree=2000)
我假设我必须制作一个列表,我在其中存储 "checked values",例如:
var = c(checkElement1)
但是这给了我以下错误:
Warning: Error in model.frame.default: variable lengths differ (found for 'var')
Stack trace (innermost first):
74: model.frame.default
73: model.frame
72: eval
71: eval
70: randomForest.formula
69: randomForest
68: observeEventHandler [#11]
4: <Anonymous>
3: do.call
2: print.shiny.appobj
1: <Promise>
有什么地方出了问题吗?
这是一个可能的解决方案。与其创建公式,不如传递 x
和 y
的值可能更容易,因此我们可以将 randomForest
称为:
randomForest(x = titanic_train[,input$myselection,drop=FALSE],
y = as.factor(titanic_train$Survived),
importance = TRUE,
ntree=2000)
其中 drop=FALSE
确保我们在只有一列 selected 时仍然传递 data.frame
而不是向量,并且 input$myselection
是 select编列。您可以从单独的 checkBox
元素构建它,但我创建了一个 checkboxGroupInput
,其中包含用户应该能够 select.
的所有可能的列
此外,从观察者内部创建反应或输出是不好的做法。参见 this slide and the two after it from a presentation by Joe Cheng。在这种情况下,我们可以将我们的预测与测试数据集一起存储在一个名为 my_prediction
的 reactiveVal 中,我们可以将其用于我们的绘图和其他统计数据。
我在下面的代码中添加了一些简单的预处理,例如字符列应该成为因子,在这种情况下,某些列的因子值太多,因此我删除了这些列。但我假设此数据集仅用于说明目的,这不是问题,因为您已经修改了真实数据集以使其正常工作。我有点强迫症,无法举出使用时 returns 错误的例子:)
希望对您有所帮助!
library(shiny)
library(DT)
library(titanic)
library(randomForest)
# Replace NA's and replace String with Factor columns
# There may be nicer ways to do this though.
titanic_train[is.na(titanic_train)] <- 0
titanic_test[is.na(titanic_test)] <- 0
titanic_train[sapply(titanic_train, is.character)] <- lapply(titanic_train[sapply(titanic_train, is.character)],
as.factor)
titanic_test[sapply(titanic_test, is.character)] <- lapply(titanic_test[sapply(titanic_test, is.character)],
as.factor)
# drop columns with too many factor levels
to_drop=sapply(colnames(titanic_train)[sapply(titanic_train,class)=='factor'],function(x) {length(levels(titanic_train[,x]))>52})
if(sum(to_drop)>0){
titanic_train <- titanic_train[,-which(names(titanic_train) %in% names(to_drop)[to_drop])]
titanic_test <- titanic_test[,-which(names(titanic_test) %in% names(to_drop)[to_drop])]
}
ui <- fluidPage(
DT::dataTableOutput("mytable"),
checkboxGroupInput('myselection','Select columns:',
choices=setdiff(colnames(titanic_train),c('PassengerId','Survived','Name')),
inline=T),
actionButton("runRF", "Predict"),
plotOutput("plotRF")
)
server <- function(input, output, session) {
output$mytable = DT::renderDataTable({
titanic_train
})
observeEvent(input$runRF, {
if(is.null(input$myselection))
{
my_prediction(NULL)
showModal(modalDialog(
title = "Error!",
"No variables selected!"
))
}
else
{
fit <- randomForest(x = titanic_train[,input$myselection,drop=FALSE],
y = as.factor(titanic_train$Survived),
importance = TRUE,
ntree=2000)
prediction <- as.numeric(predict(fit, titanic_test[,input$myselection,drop=FALSE]))
titanic_test$predicted <- prediction
my_prediction(titanic_test) # store our test set with predicted valus in reactiveVal
}
})
# A reactiveVal to store titanic_test with its predictions.
my_prediction <- reactiveVal()
output$plotRF <- renderPlot({
req(my_prediction())
hist(my_prediction()$predicted)
})
}
shinyApp(ui, server)
我们需要使用 paste
创建一个 formula
var = "Pclass"
form <- formula(paste('as.factor(Survived)', var, sep=' ~ '))
fit <- randomForest(form, data = titanic_train, importance = TRUE, ntree=2000)
-完整代码
library(shiny)
library(DT)
library(titanic)
library(randomForest)
ui <- fluidPage(
DT::dataTableOutput("mytable"),
checkboxInput("checkbox" , label = "Pclass", value = FALSE),
checkboxInput("checkbox" , label = "Sex", value = FALSE),
checkboxInput("checkbox" , label = "Age", value = FALSE),
actionButton("runRF", "Predict"),
plotOutput("plotRF")
)
server <- function(input, output, session) {
output$mytable = DT::renderDataTable({
titanic_train
})
observeEvent(input$runRF, {
var = "Pclass"
form <- formula(paste('as.factor(Survived)', var, sep=' ~ '))
fit <- randomForest(form, data = titanic_train, importance = TRUE, ntree=2000)
prediction <- as.numeric(predict(fit, titanic_test))
titanic_test$predicted <- prediction
output$plotRF <- renderPlot({
hist(prediction)
})
})
}
shinyApp(ui, server)
-输出
出于训练目的,我想制作一个 Shiny 应用程序,您可以在其中检查列名并将其用于训练随机森林算法。
我的 Shiny 应用程序如下所示:
library(shiny)
library(DT)
library(titanic)
library(randomForest)
ui <- fluidPage(
DT::dataTableOutput("mytable"),
checkboxInput("checkbox" , label = "Pclass", value = FALSE),
checkboxInput("checkbox" , label = "Sex", value = FALSE),
checkboxInput("checkbox" , label = "Age", value = FALSE),
actionButton("runRF", "Predict"),
plotOutput("plotRF")
)
server <- function(input, output, session) {
output$mytable = DT::renderDataTable({
titanic_train
})
observeEvent(input$runRF, {
var = c("Pclass")
fit <- randomForest(as.factor(Survived) ~ var, data = titanic_train, importance = TRUE, ntree=2000)
prediction <- as.numeric(predict(fit, titanic_test))
titanic_test$predicted <- prediction
output$plotRF <- renderPlot({
hist(prediction)
})
})
}
shinyApp(ui, server)
基本上上面的代码在我做类似的事情时有效:
fit <- randomForest(as.factor(Survived) ~ Age, data = titanic_train, importance = TRUE, ntree=2000)
或
fit <- randomForest(as.factor(Survived) ~ Pclass + Age, data = titanic_train, importance = TRUE, ntree=2000)
但是我想让训练变量依赖于你选中的框。所以如果你检查 Age + Pclass 它应该是:
fit <- randomForest(as.factor(Survived) ~ Pclass + Age, data = titanic_train, importance = TRUE, ntree=2000)
如果您检查年龄:
fit <- randomForest(as.factor(Survived) ~ Age, data = titanic_train, importance = TRUE, ntree=2000)
我假设我必须制作一个列表,我在其中存储 "checked values",例如:
var = c(checkElement1)
但是这给了我以下错误:
Warning: Error in model.frame.default: variable lengths differ (found for 'var')
Stack trace (innermost first):
74: model.frame.default
73: model.frame
72: eval
71: eval
70: randomForest.formula
69: randomForest
68: observeEventHandler [#11]
4: <Anonymous>
3: do.call
2: print.shiny.appobj
1: <Promise>
有什么地方出了问题吗?
这是一个可能的解决方案。与其创建公式,不如传递 x
和 y
的值可能更容易,因此我们可以将 randomForest
称为:
randomForest(x = titanic_train[,input$myselection,drop=FALSE],
y = as.factor(titanic_train$Survived),
importance = TRUE,
ntree=2000)
其中 drop=FALSE
确保我们在只有一列 selected 时仍然传递 data.frame
而不是向量,并且 input$myselection
是 select编列。您可以从单独的 checkBox
元素构建它,但我创建了一个 checkboxGroupInput
,其中包含用户应该能够 select.
此外,从观察者内部创建反应或输出是不好的做法。参见 this slide and the two after it from a presentation by Joe Cheng。在这种情况下,我们可以将我们的预测与测试数据集一起存储在一个名为 my_prediction
的 reactiveVal 中,我们可以将其用于我们的绘图和其他统计数据。
我在下面的代码中添加了一些简单的预处理,例如字符列应该成为因子,在这种情况下,某些列的因子值太多,因此我删除了这些列。但我假设此数据集仅用于说明目的,这不是问题,因为您已经修改了真实数据集以使其正常工作。我有点强迫症,无法举出使用时 returns 错误的例子:)
希望对您有所帮助!
library(shiny)
library(DT)
library(titanic)
library(randomForest)
# Replace NA's and replace String with Factor columns
# There may be nicer ways to do this though.
titanic_train[is.na(titanic_train)] <- 0
titanic_test[is.na(titanic_test)] <- 0
titanic_train[sapply(titanic_train, is.character)] <- lapply(titanic_train[sapply(titanic_train, is.character)],
as.factor)
titanic_test[sapply(titanic_test, is.character)] <- lapply(titanic_test[sapply(titanic_test, is.character)],
as.factor)
# drop columns with too many factor levels
to_drop=sapply(colnames(titanic_train)[sapply(titanic_train,class)=='factor'],function(x) {length(levels(titanic_train[,x]))>52})
if(sum(to_drop)>0){
titanic_train <- titanic_train[,-which(names(titanic_train) %in% names(to_drop)[to_drop])]
titanic_test <- titanic_test[,-which(names(titanic_test) %in% names(to_drop)[to_drop])]
}
ui <- fluidPage(
DT::dataTableOutput("mytable"),
checkboxGroupInput('myselection','Select columns:',
choices=setdiff(colnames(titanic_train),c('PassengerId','Survived','Name')),
inline=T),
actionButton("runRF", "Predict"),
plotOutput("plotRF")
)
server <- function(input, output, session) {
output$mytable = DT::renderDataTable({
titanic_train
})
observeEvent(input$runRF, {
if(is.null(input$myselection))
{
my_prediction(NULL)
showModal(modalDialog(
title = "Error!",
"No variables selected!"
))
}
else
{
fit <- randomForest(x = titanic_train[,input$myselection,drop=FALSE],
y = as.factor(titanic_train$Survived),
importance = TRUE,
ntree=2000)
prediction <- as.numeric(predict(fit, titanic_test[,input$myselection,drop=FALSE]))
titanic_test$predicted <- prediction
my_prediction(titanic_test) # store our test set with predicted valus in reactiveVal
}
})
# A reactiveVal to store titanic_test with its predictions.
my_prediction <- reactiveVal()
output$plotRF <- renderPlot({
req(my_prediction())
hist(my_prediction()$predicted)
})
}
shinyApp(ui, server)
我们需要使用 paste
formula
var = "Pclass"
form <- formula(paste('as.factor(Survived)', var, sep=' ~ '))
fit <- randomForest(form, data = titanic_train, importance = TRUE, ntree=2000)
-完整代码
library(shiny)
library(DT)
library(titanic)
library(randomForest)
ui <- fluidPage(
DT::dataTableOutput("mytable"),
checkboxInput("checkbox" , label = "Pclass", value = FALSE),
checkboxInput("checkbox" , label = "Sex", value = FALSE),
checkboxInput("checkbox" , label = "Age", value = FALSE),
actionButton("runRF", "Predict"),
plotOutput("plotRF")
)
server <- function(input, output, session) {
output$mytable = DT::renderDataTable({
titanic_train
})
observeEvent(input$runRF, {
var = "Pclass"
form <- formula(paste('as.factor(Survived)', var, sep=' ~ '))
fit <- randomForest(form, data = titanic_train, importance = TRUE, ntree=2000)
prediction <- as.numeric(predict(fit, titanic_test))
titanic_test$predicted <- prediction
output$plotRF <- renderPlot({
hist(prediction)
})
})
}
shinyApp(ui, server)
-输出