使用 for 循环绘制模型集合中的变量重要性
Plotting variable importance from ensemble of models with for loop
我在尝试从模型集合中绘制变量重要性时一直 运行 出错。
我有我已经拟合的模型集合,现在我正在尝试为我已经拟合的每个算法创建多个变量重要性图。我正在使用插入符号中的 varImp()
函数来提取变量重要性,然后 plot()
它。为了适合模型的集合,我正在使用 caretEnsemble
包。
感谢您的帮助,请参阅下面的代码示例。
# Caret ensemble is needed to produce list of models
library(caret)
library(caretEnsemble)
# Set algorithms I wish to fit
my_algorithms <- c("glmnet", "svmRadial", "rf", "nnet", "knn", "rpart")
# Define controls
my_controls <- trainControl(
method = "cv",
savePredictions = "final",
number = 3
)
# Run the models all at once with caretEnsemble
my_list_of_models <- caretEnsemble::caretList(Species ~ .,
data = iris,
trControl = my_controls,
methodList = my_algorithms)
# Subset models
list_of_algorithms <- my_list_of_models[my_algorithms]
# Create first for loop to extract variable importance via caret::varImp()
importance <- list()
for (algo in seq_along(list_of_algorithms)) {
importance[[algo]] <- varImp(list_of_algorithms[[algo]])
}
# Create second loop to go over extracted importance and plot it using plot()
importance_plots <- list()
for (imp in seq_along(importance)) {
importance_plots[[imp]] <- plot(importance[[imp]])
}
# Error occurs during the second for loop:
Error in data.frame(values = unlist(unname(x)), ind, stringsAsFactors = FALSE):arguments imply differing number of rows: 16,
我想出了解决上述问题的方法,并决定post将其作为我自己的答案。我编写了一个小函数来绘制变量重要性,而不依赖 caret
辅助函数来创建绘图。我使用 dotplot
和 levelplot
因为 caret
returns data.frame
根据提供的算法不同。它可能不适用于不适合的不同算法和模型。
# Libraries ---------------------------------------------------------------
library(caret) # To train ML algorithms
library(dplyr) # Required for %>% operators in custom function below
library(caretEnsemble) # To train multiple caret models
library(lattice) # Required for plotting, should be loaded alongside caret
library(gridExtra) # Required for plotting multiple plots
# Custom function ---------------------------------------------------------
# The function requires list of models as input and is used in for loop
plot_importance <- function(importance_list, imp, algo_names) {
importance <- importance_list[[imp]]$importance
model_title <- algo_names[[imp]]
if (ncol(importance) < 2) { # Plot dotplot if dim is ncol < 2
importance %>%
as.matrix() %>%
dotplot(main = model_title)
} else { # Plot heatmap if ncol > 2
importance %>%
as.matrix() %>%
levelplot(xlab = NULL, ylab = NULL, main = model_title, scales = list(x = list(rot = 45)))
}
}
# Tuning parameters -------------------------------------------------------
# Set algorithms I wish to fit
# Rather than using methodList as provided above, I've switched to tuneList because I need to control tuning parameters of random forest algorithm.
my_algorithms <- list(
glmnet = caretModelSpec(method = "glmnet"),
rpart = caretModelSpec(method = "rpart"),
svmRadial = caretModelSpec(method = "svmRadial"),
rf = caretModelSpec(method = "rf", importance = TRUE), # Importance is not computed for "rf" by default
nnet = caretModelSpec(method = "nnet"),
knn = caretModelSpec(method = "knn")
)
# Define controls
my_controls <- trainControl(
method = "cv",
savePredictions = "final",
number = 3
)
# Run the models all at once with caretEnsemble
my_list_of_models <- caretList(Species ~ .,
data = iris,
tuneList = my_algorithms,
trControl = my_controls
)
# Extract variable importance ---------------------------------------------
importance <- lapply(my_list_of_models, varImp)
# Plotting variable immportance -------------------------------------------
# Create second loop to go over extracted importance and plot it using plot()
importance_plots <- list()
for (imp in seq_along(importance)) {
# importance_plots[[imp]] <- plot(importance[[imp]])
importance_plots[[imp]] <- plot_importance(importance_list = importance, imp = imp, algo_names = names(my_list_of_models))
}
# Multiple plots at once
do.call("grid.arrange", c(importance_plots))
我在尝试从模型集合中绘制变量重要性时一直 运行 出错。
我有我已经拟合的模型集合,现在我正在尝试为我已经拟合的每个算法创建多个变量重要性图。我正在使用插入符号中的 varImp()
函数来提取变量重要性,然后 plot()
它。为了适合模型的集合,我正在使用 caretEnsemble
包。
感谢您的帮助,请参阅下面的代码示例。
# Caret ensemble is needed to produce list of models
library(caret)
library(caretEnsemble)
# Set algorithms I wish to fit
my_algorithms <- c("glmnet", "svmRadial", "rf", "nnet", "knn", "rpart")
# Define controls
my_controls <- trainControl(
method = "cv",
savePredictions = "final",
number = 3
)
# Run the models all at once with caretEnsemble
my_list_of_models <- caretEnsemble::caretList(Species ~ .,
data = iris,
trControl = my_controls,
methodList = my_algorithms)
# Subset models
list_of_algorithms <- my_list_of_models[my_algorithms]
# Create first for loop to extract variable importance via caret::varImp()
importance <- list()
for (algo in seq_along(list_of_algorithms)) {
importance[[algo]] <- varImp(list_of_algorithms[[algo]])
}
# Create second loop to go over extracted importance and plot it using plot()
importance_plots <- list()
for (imp in seq_along(importance)) {
importance_plots[[imp]] <- plot(importance[[imp]])
}
# Error occurs during the second for loop:
Error in data.frame(values = unlist(unname(x)), ind, stringsAsFactors = FALSE):arguments imply differing number of rows: 16,
我想出了解决上述问题的方法,并决定post将其作为我自己的答案。我编写了一个小函数来绘制变量重要性,而不依赖 caret
辅助函数来创建绘图。我使用 dotplot
和 levelplot
因为 caret
returns data.frame
根据提供的算法不同。它可能不适用于不适合的不同算法和模型。
# Libraries ---------------------------------------------------------------
library(caret) # To train ML algorithms
library(dplyr) # Required for %>% operators in custom function below
library(caretEnsemble) # To train multiple caret models
library(lattice) # Required for plotting, should be loaded alongside caret
library(gridExtra) # Required for plotting multiple plots
# Custom function ---------------------------------------------------------
# The function requires list of models as input and is used in for loop
plot_importance <- function(importance_list, imp, algo_names) {
importance <- importance_list[[imp]]$importance
model_title <- algo_names[[imp]]
if (ncol(importance) < 2) { # Plot dotplot if dim is ncol < 2
importance %>%
as.matrix() %>%
dotplot(main = model_title)
} else { # Plot heatmap if ncol > 2
importance %>%
as.matrix() %>%
levelplot(xlab = NULL, ylab = NULL, main = model_title, scales = list(x = list(rot = 45)))
}
}
# Tuning parameters -------------------------------------------------------
# Set algorithms I wish to fit
# Rather than using methodList as provided above, I've switched to tuneList because I need to control tuning parameters of random forest algorithm.
my_algorithms <- list(
glmnet = caretModelSpec(method = "glmnet"),
rpart = caretModelSpec(method = "rpart"),
svmRadial = caretModelSpec(method = "svmRadial"),
rf = caretModelSpec(method = "rf", importance = TRUE), # Importance is not computed for "rf" by default
nnet = caretModelSpec(method = "nnet"),
knn = caretModelSpec(method = "knn")
)
# Define controls
my_controls <- trainControl(
method = "cv",
savePredictions = "final",
number = 3
)
# Run the models all at once with caretEnsemble
my_list_of_models <- caretList(Species ~ .,
data = iris,
tuneList = my_algorithms,
trControl = my_controls
)
# Extract variable importance ---------------------------------------------
importance <- lapply(my_list_of_models, varImp)
# Plotting variable immportance -------------------------------------------
# Create second loop to go over extracted importance and plot it using plot()
importance_plots <- list()
for (imp in seq_along(importance)) {
# importance_plots[[imp]] <- plot(importance[[imp]])
importance_plots[[imp]] <- plot_importance(importance_list = importance, imp = imp, algo_names = names(my_list_of_models))
}
# Multiple plots at once
do.call("grid.arrange", c(importance_plots))