如何使用 R 中的 pdp 计算 3d 部分依赖图?
How to use the pdp in R to compute 3d partial dependence plots?
我在 R 中有一个与此类似的随机森林模型:
library("randomForest")
library("caret")
library("pdp")
data("cars")
my_data<-cars[1:5]
my_rf <- randomForest( Price ~ ., data=my_data)
price_mil<- partial(my_rf, pred.var = c("Price", "Mileage"))
plotPartial(price_mil, levelplot = FALSE, zlab = "Price", colorkey = TRUE)
但是,我想要一些 3d 部分依赖图,包括轴上的参数值。我如何使用 pdp
执行此操作?
首先,在您的示例中,您在 partial()
函数中使用了“价格”。这对我来说没有意义,因为您基本上只是以这种方式绘制二维部分依赖图。我在下面的示例代码中更改了它。
但是,要获取请求的部分地块,您可以使用
plotPartial(price_mil, zlab = "Price", levelplot = F, scale = list(arrows = F))
如果你想有更多的控制权,我建议使用包的底层函数来构造你的公式和线框对象,然后调用 wireframe()
和 scale=list(arrows = F)
将值添加到坐标轴。
library("randomForest")
library("caret")
library("pdp")
data("cars")
my_data <- cars[1:5]
my_rf <- randomForest( Price ~ ., data=my_data)
object <- pdp::partial(my_rf, pred.var = c("Cylinder", "Mileage"))
form <- stats::as.formula(paste("yhat ~", paste(names(object)[1L:2L],
collapse = "*")))
wireframe(form, data = object, drape =T, zlab = "Price", scale = list(arrows = F))
产量
带有 plotly 的交互式 3D 部分依赖图
# Random seed to reproduce the results
set.seed(1)
# Create artificial data for a binary classification problem
y <- factor(sample(c(0,1), size = 100, replace = TRUE), levels = c("0", "1"))
d <- data.frame(y = y, x1 = rnorm(100), x2 = rnorm(100), x3 = rnorm(100))
# Build a random forest model
library(randomForest)
rf1 <- randomForest::randomForest(y ~., n.trees = 100, mtry = 2, data = d)
###### Bivariate partial dependency plots ######
# Step 1: compute the partial dependence values
# given two variables using the pdp library
library(pdp)
pd <- rf1 %>% partial(pred.var = c("x1", "x2"), n.trees = 100)
# Step 2: construct the plot using the plotly library
library(plotly)
p <- plot_ly(x = pd$x1, y = pd$x2, z = pd$yhat, type = 'mesh3d')
# Step 3: add labels to the plot
p <- p %>% layout(scene = list(xaxis = list(title = "x1"),
yaxis = list(title = "x2"),
zaxis = list(title = "Partial Dependence")))
# Step 4: show the plot
show(p)
交互式等高线图(即扁平的 2 变量 PDP),使用 plotly
对部分相关值进行色标
###### Bivariate PDPs with colored scale ######
# Interpolate the partial dependence values
dens <- akima::interp(x = pd$x1, y = pd$x2, z = pd$yhat)
# Flattened contour partial dependence plot for 2 variables
p2 <- plot_ly(x = dens$x,
y = dens$y,
z = dens$z,
colors = c("blue", "grey", "red"),
type = "contour")
# Add axis labels for 2D plots
p2 <- p2 %>% layout(xaxis = list(title = "x1"), yaxis = list(title = "x2"))
# Show the plot
show(p2)
使用 plotly
的交互式 3D 部分依赖图,带有部分依赖值的色标
###### Interactive 3D partial dependence plot with coloring scale ######
# Interpolate the partial dependence values
dens <- akima::interp(x = pd$x1, y = pd$x2, z = pd$yhat)
# 3D partial dependence plot with a coloring scale
p3 <- plot_ly(x = dens$x,
y = dens$y,
z = dens$z,
colors = c("blue", "grey", "red"),
type = "surface")
# Add axis labels for 3D plots
p3 <- p3 %>% layout(scene = list(xaxis = list(title = "x1"),
yaxis = list(title = "x2"),
zaxis = list(title = "Partial Dependence")))
# Show the plot
show(p3)
我在 R 中有一个与此类似的随机森林模型:
library("randomForest")
library("caret")
library("pdp")
data("cars")
my_data<-cars[1:5]
my_rf <- randomForest( Price ~ ., data=my_data)
price_mil<- partial(my_rf, pred.var = c("Price", "Mileage"))
plotPartial(price_mil, levelplot = FALSE, zlab = "Price", colorkey = TRUE)
但是,我想要一些 3d 部分依赖图,包括轴上的参数值。我如何使用 pdp
执行此操作?
首先,在您的示例中,您在 partial()
函数中使用了“价格”。这对我来说没有意义,因为您基本上只是以这种方式绘制二维部分依赖图。我在下面的示例代码中更改了它。
但是,要获取请求的部分地块,您可以使用
plotPartial(price_mil, zlab = "Price", levelplot = F, scale = list(arrows = F))
如果你想有更多的控制权,我建议使用包的底层函数来构造你的公式和线框对象,然后调用 wireframe()
和 scale=list(arrows = F)
将值添加到坐标轴。
library("randomForest")
library("caret")
library("pdp")
data("cars")
my_data <- cars[1:5]
my_rf <- randomForest( Price ~ ., data=my_data)
object <- pdp::partial(my_rf, pred.var = c("Cylinder", "Mileage"))
form <- stats::as.formula(paste("yhat ~", paste(names(object)[1L:2L],
collapse = "*")))
wireframe(form, data = object, drape =T, zlab = "Price", scale = list(arrows = F))
产量
带有 plotly 的交互式 3D 部分依赖图
# Random seed to reproduce the results
set.seed(1)
# Create artificial data for a binary classification problem
y <- factor(sample(c(0,1), size = 100, replace = TRUE), levels = c("0", "1"))
d <- data.frame(y = y, x1 = rnorm(100), x2 = rnorm(100), x3 = rnorm(100))
# Build a random forest model
library(randomForest)
rf1 <- randomForest::randomForest(y ~., n.trees = 100, mtry = 2, data = d)
###### Bivariate partial dependency plots ######
# Step 1: compute the partial dependence values
# given two variables using the pdp library
library(pdp)
pd <- rf1 %>% partial(pred.var = c("x1", "x2"), n.trees = 100)
# Step 2: construct the plot using the plotly library
library(plotly)
p <- plot_ly(x = pd$x1, y = pd$x2, z = pd$yhat, type = 'mesh3d')
# Step 3: add labels to the plot
p <- p %>% layout(scene = list(xaxis = list(title = "x1"),
yaxis = list(title = "x2"),
zaxis = list(title = "Partial Dependence")))
# Step 4: show the plot
show(p)
交互式等高线图(即扁平的 2 变量 PDP),使用 plotly
对部分相关值进行色标###### Bivariate PDPs with colored scale ######
# Interpolate the partial dependence values
dens <- akima::interp(x = pd$x1, y = pd$x2, z = pd$yhat)
# Flattened contour partial dependence plot for 2 variables
p2 <- plot_ly(x = dens$x,
y = dens$y,
z = dens$z,
colors = c("blue", "grey", "red"),
type = "contour")
# Add axis labels for 2D plots
p2 <- p2 %>% layout(xaxis = list(title = "x1"), yaxis = list(title = "x2"))
# Show the plot
show(p2)
使用 plotly
的交互式 3D 部分依赖图,带有部分依赖值的色标###### Interactive 3D partial dependence plot with coloring scale ######
# Interpolate the partial dependence values
dens <- akima::interp(x = pd$x1, y = pd$x2, z = pd$yhat)
# 3D partial dependence plot with a coloring scale
p3 <- plot_ly(x = dens$x,
y = dens$y,
z = dens$z,
colors = c("blue", "grey", "red"),
type = "surface")
# Add axis labels for 3D plots
p3 <- p3 %>% layout(scene = list(xaxis = list(title = "x1"),
yaxis = list(title = "x2"),
zaxis = list(title = "Partial Dependence")))
# Show the plot
show(p3)