scatterplot3d:带残差的回归平面
scatterplot3d: regression plane with residuals
在 R 中使用 scatterplot3d
,我试图从观察到回归平面绘制红线:
wh <- iris$Species != "setosa"
x <- iris$Sepal.Width[wh]
y <- iris$Sepal.Length[wh]
z <- iris$Petal.Width[wh]
df <- data.frame(x, y, z)
LM <- lm(y ~ x + z, df)
library(scatterplot3d)
G <- scatterplot3d(x, z, y, highlight.3d = FALSE, type = "p")
G$plane3d(LM, draw_polygon = TRUE, draw_lines = FALSE)
获取下图的3D等价物:
在 2D 中,我可以只使用 segments
:
pred <- predict(model)
segments(x, y, x, pred, col = 2)
但是在 3D 中我对坐标感到困惑。
使用来自 An Introduction to Statistical Learning 的广告数据集,您可以
advertising_fit1 <- lm(sales~TV+radio, data = advertising)
sp <- scatterplot3d::scatterplot3d(advertising$TV,
advertising$radio,
advertising$sales,
angle = 45)
sp$plane3d(advertising_fit1, lty.box = "solid")#,
# polygon_args = list(col = rgb(.1, .2, .7, .5)) # Fill color
orig <- sp$xyz.convert(advertising$TV,
advertising$radio,
advertising$sales)
plane <- sp$xyz.convert(advertising$TV,
advertising$radio, fitted(advertising_fit1))
i.negpos <- 1 + (resid(advertising_fit1) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
col = c("blue", "red")[i.negpos],
lty = 1) # (2:1)[i.negpos]
sp <- FactoClass::addgrids3d(advertising$TV,
advertising$radio,
advertising$sales,
angle = 45,
grid = c("xy", "xz", "yz"))
另一个交互式版本使用rgl
包
rgl::plot3d(advertising$TV,
advertising$radio,
advertising$sales, type = "p",
xlab = "TV",
ylab = "radio",
zlab = "Sales", site = 5, lwd = 15)
rgl::planes3d(advertising_fit1$coefficients["TV"],
advertising_fit1$coefficients["radio"], -1,
advertising_fit1$coefficients["(Intercept)"], alpha = 0.3, front = "line")
rgl::segments3d(rep(advertising$TV, each = 2),
rep(advertising$radio, each = 2),
matrix(t(cbind(advertising$sales, predict(advertising_fit1))), nc = 1),
col = c("blue", "red")[i.negpos],
lty = 1) # (2:1)[i.negpos]
rgl::rgl.postscript("./pics/plot-advertising-rgl.pdf","pdf") # does not really work...
我决定也包括我自己的实现,以防其他人想要使用它。
回归平面
require("scatterplot3d")
# Data, linear regression with two explanatory variables
wh <- iris$Species != "setosa"
x <- iris$Sepal.Width[wh]
y <- iris$Sepal.Length[wh]
z <- iris$Petal.Width[wh]
df <- data.frame(x, y, z)
LM <- lm(y ~ x + z, df)
# scatterplot
s3d <- scatterplot3d(x, z, y, pch = 19, type = "p", color = "darkgrey",
main = "Regression Plane", grid = TRUE, box = FALSE,
mar = c(2.5, 2.5, 2, 1.5), angle = 55)
# regression plane
s3d$plane3d(LM, draw_polygon = TRUE, draw_lines = TRUE,
polygon_args = list(col = rgb(.1, .2, .7, .5)))
# overlay positive residuals
wh <- resid(LM) > 0
s3d$points3d(x[wh], z[wh], y[wh], pch = 19)
残差
# scatterplot
s3d <- scatterplot3d(x, z, y, pch = 19, type = "p", color = "darkgrey",
main = "Regression Plane", grid = TRUE, box = FALSE,
mar = c(2.5, 2.5, 2, 1.5), angle = 55)
# compute locations of segments
orig <- s3d$xyz.convert(x, z, y)
plane <- s3d$xyz.convert(x, z, fitted(LM))
i.negpos <- 1 + (resid(LM) > 0) # which residuals are above the plane?
# draw residual distances to regression plane
segments(orig$x, orig$y, plane$x, plane$y, col = "red", lty = c(2, 1)[i.negpos],
lwd = 1.5)
# draw the regression plane
s3d$plane3d(LM, draw_polygon = TRUE, draw_lines = TRUE,
polygon_args = list(col = rgb(0.8, 0.8, 0.8, 0.8)))
# redraw positive residuals and segments above the plane
wh <- resid(LM) > 0
segments(orig$x[wh], orig$y[wh], plane$x[wh], plane$y[wh], col = "red", lty = 1, lwd = 1.5)
s3d$points3d(x[wh], z[wh], y[wh], pch = 19)
最终结果:
虽然我真的很欣赏 scatterplot3d
函数的便利性,但最终我还是 copying the entire function from github,因为基数 plot
中的几个参数要么被强制,要么不被强制正确传递给 scatterplot3d
(例如,使用 las
进行轴旋转,使用 cex
、cex.main
进行字符扩展等)。我不确定这么长而且乱七八糟的代码块在这里是否合适,所以我在上面包含了 MWE。
无论如何,这就是我最终包含在我的书中的内容:
(是的,那其实只是鸢尾花数据集,不要告诉任何人。)
在 R 中使用 scatterplot3d
,我试图从观察到回归平面绘制红线:
wh <- iris$Species != "setosa"
x <- iris$Sepal.Width[wh]
y <- iris$Sepal.Length[wh]
z <- iris$Petal.Width[wh]
df <- data.frame(x, y, z)
LM <- lm(y ~ x + z, df)
library(scatterplot3d)
G <- scatterplot3d(x, z, y, highlight.3d = FALSE, type = "p")
G$plane3d(LM, draw_polygon = TRUE, draw_lines = FALSE)
获取下图的3D等价物:
在 2D 中,我可以只使用 segments
:
pred <- predict(model)
segments(x, y, x, pred, col = 2)
但是在 3D 中我对坐标感到困惑。
使用来自 An Introduction to Statistical Learning 的广告数据集,您可以
advertising_fit1 <- lm(sales~TV+radio, data = advertising)
sp <- scatterplot3d::scatterplot3d(advertising$TV,
advertising$radio,
advertising$sales,
angle = 45)
sp$plane3d(advertising_fit1, lty.box = "solid")#,
# polygon_args = list(col = rgb(.1, .2, .7, .5)) # Fill color
orig <- sp$xyz.convert(advertising$TV,
advertising$radio,
advertising$sales)
plane <- sp$xyz.convert(advertising$TV,
advertising$radio, fitted(advertising_fit1))
i.negpos <- 1 + (resid(advertising_fit1) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
col = c("blue", "red")[i.negpos],
lty = 1) # (2:1)[i.negpos]
sp <- FactoClass::addgrids3d(advertising$TV,
advertising$radio,
advertising$sales,
angle = 45,
grid = c("xy", "xz", "yz"))
另一个交互式版本使用rgl
包
rgl::plot3d(advertising$TV,
advertising$radio,
advertising$sales, type = "p",
xlab = "TV",
ylab = "radio",
zlab = "Sales", site = 5, lwd = 15)
rgl::planes3d(advertising_fit1$coefficients["TV"],
advertising_fit1$coefficients["radio"], -1,
advertising_fit1$coefficients["(Intercept)"], alpha = 0.3, front = "line")
rgl::segments3d(rep(advertising$TV, each = 2),
rep(advertising$radio, each = 2),
matrix(t(cbind(advertising$sales, predict(advertising_fit1))), nc = 1),
col = c("blue", "red")[i.negpos],
lty = 1) # (2:1)[i.negpos]
rgl::rgl.postscript("./pics/plot-advertising-rgl.pdf","pdf") # does not really work...
我决定也包括我自己的实现,以防其他人想要使用它。
回归平面
require("scatterplot3d")
# Data, linear regression with two explanatory variables
wh <- iris$Species != "setosa"
x <- iris$Sepal.Width[wh]
y <- iris$Sepal.Length[wh]
z <- iris$Petal.Width[wh]
df <- data.frame(x, y, z)
LM <- lm(y ~ x + z, df)
# scatterplot
s3d <- scatterplot3d(x, z, y, pch = 19, type = "p", color = "darkgrey",
main = "Regression Plane", grid = TRUE, box = FALSE,
mar = c(2.5, 2.5, 2, 1.5), angle = 55)
# regression plane
s3d$plane3d(LM, draw_polygon = TRUE, draw_lines = TRUE,
polygon_args = list(col = rgb(.1, .2, .7, .5)))
# overlay positive residuals
wh <- resid(LM) > 0
s3d$points3d(x[wh], z[wh], y[wh], pch = 19)
残差
# scatterplot
s3d <- scatterplot3d(x, z, y, pch = 19, type = "p", color = "darkgrey",
main = "Regression Plane", grid = TRUE, box = FALSE,
mar = c(2.5, 2.5, 2, 1.5), angle = 55)
# compute locations of segments
orig <- s3d$xyz.convert(x, z, y)
plane <- s3d$xyz.convert(x, z, fitted(LM))
i.negpos <- 1 + (resid(LM) > 0) # which residuals are above the plane?
# draw residual distances to regression plane
segments(orig$x, orig$y, plane$x, plane$y, col = "red", lty = c(2, 1)[i.negpos],
lwd = 1.5)
# draw the regression plane
s3d$plane3d(LM, draw_polygon = TRUE, draw_lines = TRUE,
polygon_args = list(col = rgb(0.8, 0.8, 0.8, 0.8)))
# redraw positive residuals and segments above the plane
wh <- resid(LM) > 0
segments(orig$x[wh], orig$y[wh], plane$x[wh], plane$y[wh], col = "red", lty = 1, lwd = 1.5)
s3d$points3d(x[wh], z[wh], y[wh], pch = 19)
最终结果:
虽然我真的很欣赏 scatterplot3d
函数的便利性,但最终我还是 copying the entire function from github,因为基数 plot
中的几个参数要么被强制,要么不被强制正确传递给 scatterplot3d
(例如,使用 las
进行轴旋转,使用 cex
、cex.main
进行字符扩展等)。我不确定这么长而且乱七八糟的代码块在这里是否合适,所以我在上面包含了 MWE。
无论如何,这就是我最终包含在我的书中的内容:
(是的,那其实只是鸢尾花数据集,不要告诉任何人。)