Plotly:在箱线图中用样本名称注释异常值
Plotly: Annotate outliers with sample names in boxplot
我正在尝试使用 ggplot 创建一个箱线图并绘制数据集 airquality
,其中 Month
在 x 轴上,Ozone
值在 y 轴上。我的目标是注释情节,以便当我将鼠标悬停在离群点上时,除了臭氧值外,它还应该显示 Sample
名称:
library(tidyverse)
library(plotly)
library(datasets)
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_',seq(1:nrow(airquality)))
# boxplot
p <- ggplot(airquality, aes(x = Month, y = Ozone)) +
geom_boxplot()
p <- plotly_build(p)
p
这是创建的情节:
默认情况下,当我将鼠标悬停在每个框上时,它会显示 x 轴变量的基本摘要统计信息。但是,我还想看看异常样本是什么。例如将鼠标悬停在 May 上时,它显示异常值 115
但不显示它实际上是 Sample_30
。
如何将 Sample 变量添加到离群点,使其同时显示离群值和样本名称?
我在 https://github.com/ropensci/plotly/issues/887
上找到了解决方案
尝试制作这种代码!
library(plotly)
vals <- boxplot(airquality$Ozone,plot = FALSE)
y <- airquality[airquality$Ozone > vals$stats[5,1] | airquality$Ozone < vals$stats[1,1],]
plot_ly(airquality,y = ~Ozone,x = ~Month,type = "box") %>%
add_markers(data = y, text = y$Day)
我们可以几乎这样得到:
library(ggplot2)
library(plotly)
library(datasets)
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_',seq(1:nrow(airquality)))
# boxplot
gg <- ggplot(airquality, aes(x = Month, y = Ozone)) +
geom_boxplot()
ggly <- ggplotly(gg)
# add hover info
hoverinfo <- with(airquality, paste0("sample: ", Sample, "</br></br>",
"month: ", Month, "</br>",
"ozone: ", Ozone))
ggly$x$data[[1]]$text <- hoverinfo
ggly$x$data[[1]]$hoverinfo <- c("text", "boxes")
ggly
不幸的是,悬停不适用于第一个箱线图...
我已经通过 Shiny 实现了这一目标。
library(plotly)
library(shiny)
library(htmlwidgets)
library(datasets)
# Prepare data ----
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_', seq(1:nrow(airquality)))
# Plotly on hover event ----
addHoverBehavior <- c(
"function(el, x){",
" el.on('plotly_hover', function(data) {",
" if(data.points.length==1){",
" $('.hovertext').hide();",
" Shiny.setInputValue('hovering', true);",
" var d = data.points[0];",
" Shiny.setInputValue('left_px', d.xaxis.d2p(d.x) + d.xaxis._offset);",
" Shiny.setInputValue('top_px', d.yaxis.l2p(d.y) + d.yaxis._offset);",
" Shiny.setInputValue('dx', d.x);",
" Shiny.setInputValue('dy', d.y);",
" Shiny.setInputValue('dtext', d.text);",
" }",
" });",
" el.on('plotly_unhover', function(data) {",
" Shiny.setInputValue('hovering', false);",
" });",
"}")
# Shiny app ----
ui <- fluidPage(
tags$head(
# style for the tooltip with an arrow (http://www.cssarrowplease.com/)
tags$style("
.arrow_box {
position: absolute;
pointer-events: none;
z-index: 100;
white-space: nowrap;
background: rgb(54,57,64);
color: white;
font-size: 14px;
border: 1px solid;
border-color: rgb(54,57,64);
border-radius: 1px;
}
.arrow_box:after, .arrow_box:before {
right: 100%;
top: 50%;
border: solid transparent;
content: ' ';
height: 0;
width: 0;
position: absolute;
pointer-events: none;
}
.arrow_box:after {
border-color: rgba(136, 183, 213, 0);
border-right-color: rgb(54,57,64);
border-width: 4px;
margin-top: -4px;
}
.arrow_box:before {
border-color: rgba(194, 225, 245, 0);
border-right-color: rgb(54,57,64);
border-width: 10px;
margin-top: -10px;
}")
),
div(
style = "position:relative",
plotlyOutput("myplot"),
uiOutput("hover_info")
)
)
server <- function(input, output){
output$myplot <- renderPlotly({
airquality[[".id"]] <- seq_len(nrow(airquality))
gg <- ggplot(airquality, aes(x=Month, y=Ozone, ids=.id)) + geom_boxplot()
ggly <- ggplotly(gg, tooltip = "y")
ids <- ggly$x$data[[1]]$ids
ggly$x$data[[1]]$text <-
with(airquality, paste0("<b> sample: </b>", Sample, "<br/>",
"<b> month: </b>", Month, "<br/>",
"<b> ozone: </b>", Ozone))[ids]
ggly %>% onRender(addHoverBehavior)
})
output$hover_info <- renderUI({
if(isTRUE(input[["hovering"]])){
style <- paste0("left: ", input[["left_px"]] + 4 + 5, "px;", # 4 = border-width after
"top: ", input[["top_px"]] - 24 - 2 - 1, "px;") # 24 = line-height/2 * number of lines; 2 = padding; 1 = border thickness
div(
class = "arrow_box", style = style,
p(HTML(input$dtext),
style="margin: 0; padding: 2px; line-height: 16px;")
)
}
})
}
shinyApp(ui = ui, server = server)
此方法将实现相同的结果,但不显示箱线图汇总统计悬停。删除离群值并悬停在箱线图层上,并覆盖仅包含悬停信息的离群值的 geom_point 层。 plotly 异常值的定义在 here 中说明。在处理更复杂的图形(例如并排分组的箱线图)时,此方法比其他解决方案更有效。有趣的是,此数据的 ggplotly boxplot 图与 ggplot 图不同。 ggplotly 中 Aug 的上围栏胡须比 8 月的 ggplot 上围栏胡须延伸得更远。
library(dplyr)
library(plotly)
library(datasets)
library(ggplot2)
data(airquality)
# manipulate data
mydata = airquality %>%
# add months
mutate(Month = factor(airquality$Month,labels = c("May", "Jun", "Jul", "Aug", "Sep")),
# add sample names
Sample = paste0('Sample_',seq(1:n())))%>%
# label if outlier sample by Month
group_by(Month) %>%
mutate(OutlierFlag = ifelse((Ozone<quantile(Ozone,1/3,na.rm=T)-1.5*IQR(Ozone,na.rm=T)) | (Ozone>quantile(Ozone,2/3,na.rm=T)+1.5*IQR(Ozone,na.rm=T)),'Outlier','NotOutlier'))%>%
group_by()
# boxplot
p <- ggplot(mydata, aes(x = Month, y = Ozone)) +
geom_boxplot()+
geom_point(data=mydata %>% filter(OutlierFlag=="Outlier"),aes(group=Month,label1=Sample,label2=Ozone),size=2)
output = ggplotly(p, tooltip=c("label1","label2"))
# makes boxplot outliers invisible and hover info off
for (i in 1:length(output$x$data)){
if (output$x$data[[i]]$type=="box"){
output$x$data[[i]]$marker$opacity = 0
output$x$data[[i]]$hoverinfo = "none"
}
}
# print end result of plotly graph
output
我正在尝试使用 ggplot 创建一个箱线图并绘制数据集 airquality
,其中 Month
在 x 轴上,Ozone
值在 y 轴上。我的目标是注释情节,以便当我将鼠标悬停在离群点上时,除了臭氧值外,它还应该显示 Sample
名称:
library(tidyverse)
library(plotly)
library(datasets)
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_',seq(1:nrow(airquality)))
# boxplot
p <- ggplot(airquality, aes(x = Month, y = Ozone)) +
geom_boxplot()
p <- plotly_build(p)
p
这是创建的情节:
默认情况下,当我将鼠标悬停在每个框上时,它会显示 x 轴变量的基本摘要统计信息。但是,我还想看看异常样本是什么。例如将鼠标悬停在 May 上时,它显示异常值 115
但不显示它实际上是 Sample_30
。
如何将 Sample 变量添加到离群点,使其同时显示离群值和样本名称?
我在 https://github.com/ropensci/plotly/issues/887
上找到了解决方案尝试制作这种代码!
library(plotly)
vals <- boxplot(airquality$Ozone,plot = FALSE)
y <- airquality[airquality$Ozone > vals$stats[5,1] | airquality$Ozone < vals$stats[1,1],]
plot_ly(airquality,y = ~Ozone,x = ~Month,type = "box") %>%
add_markers(data = y, text = y$Day)
我们可以几乎这样得到:
library(ggplot2)
library(plotly)
library(datasets)
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_',seq(1:nrow(airquality)))
# boxplot
gg <- ggplot(airquality, aes(x = Month, y = Ozone)) +
geom_boxplot()
ggly <- ggplotly(gg)
# add hover info
hoverinfo <- with(airquality, paste0("sample: ", Sample, "</br></br>",
"month: ", Month, "</br>",
"ozone: ", Ozone))
ggly$x$data[[1]]$text <- hoverinfo
ggly$x$data[[1]]$hoverinfo <- c("text", "boxes")
ggly
不幸的是,悬停不适用于第一个箱线图...
我已经通过 Shiny 实现了这一目标。
library(plotly)
library(shiny)
library(htmlwidgets)
library(datasets)
# Prepare data ----
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_', seq(1:nrow(airquality)))
# Plotly on hover event ----
addHoverBehavior <- c(
"function(el, x){",
" el.on('plotly_hover', function(data) {",
" if(data.points.length==1){",
" $('.hovertext').hide();",
" Shiny.setInputValue('hovering', true);",
" var d = data.points[0];",
" Shiny.setInputValue('left_px', d.xaxis.d2p(d.x) + d.xaxis._offset);",
" Shiny.setInputValue('top_px', d.yaxis.l2p(d.y) + d.yaxis._offset);",
" Shiny.setInputValue('dx', d.x);",
" Shiny.setInputValue('dy', d.y);",
" Shiny.setInputValue('dtext', d.text);",
" }",
" });",
" el.on('plotly_unhover', function(data) {",
" Shiny.setInputValue('hovering', false);",
" });",
"}")
# Shiny app ----
ui <- fluidPage(
tags$head(
# style for the tooltip with an arrow (http://www.cssarrowplease.com/)
tags$style("
.arrow_box {
position: absolute;
pointer-events: none;
z-index: 100;
white-space: nowrap;
background: rgb(54,57,64);
color: white;
font-size: 14px;
border: 1px solid;
border-color: rgb(54,57,64);
border-radius: 1px;
}
.arrow_box:after, .arrow_box:before {
right: 100%;
top: 50%;
border: solid transparent;
content: ' ';
height: 0;
width: 0;
position: absolute;
pointer-events: none;
}
.arrow_box:after {
border-color: rgba(136, 183, 213, 0);
border-right-color: rgb(54,57,64);
border-width: 4px;
margin-top: -4px;
}
.arrow_box:before {
border-color: rgba(194, 225, 245, 0);
border-right-color: rgb(54,57,64);
border-width: 10px;
margin-top: -10px;
}")
),
div(
style = "position:relative",
plotlyOutput("myplot"),
uiOutput("hover_info")
)
)
server <- function(input, output){
output$myplot <- renderPlotly({
airquality[[".id"]] <- seq_len(nrow(airquality))
gg <- ggplot(airquality, aes(x=Month, y=Ozone, ids=.id)) + geom_boxplot()
ggly <- ggplotly(gg, tooltip = "y")
ids <- ggly$x$data[[1]]$ids
ggly$x$data[[1]]$text <-
with(airquality, paste0("<b> sample: </b>", Sample, "<br/>",
"<b> month: </b>", Month, "<br/>",
"<b> ozone: </b>", Ozone))[ids]
ggly %>% onRender(addHoverBehavior)
})
output$hover_info <- renderUI({
if(isTRUE(input[["hovering"]])){
style <- paste0("left: ", input[["left_px"]] + 4 + 5, "px;", # 4 = border-width after
"top: ", input[["top_px"]] - 24 - 2 - 1, "px;") # 24 = line-height/2 * number of lines; 2 = padding; 1 = border thickness
div(
class = "arrow_box", style = style,
p(HTML(input$dtext),
style="margin: 0; padding: 2px; line-height: 16px;")
)
}
})
}
shinyApp(ui = ui, server = server)
此方法将实现相同的结果,但不显示箱线图汇总统计悬停。删除离群值并悬停在箱线图层上,并覆盖仅包含悬停信息的离群值的 geom_point 层。 plotly 异常值的定义在 here 中说明。在处理更复杂的图形(例如并排分组的箱线图)时,此方法比其他解决方案更有效。有趣的是,此数据的 ggplotly boxplot 图与 ggplot 图不同。 ggplotly 中 Aug 的上围栏胡须比 8 月的 ggplot 上围栏胡须延伸得更远。
library(dplyr)
library(plotly)
library(datasets)
library(ggplot2)
data(airquality)
# manipulate data
mydata = airquality %>%
# add months
mutate(Month = factor(airquality$Month,labels = c("May", "Jun", "Jul", "Aug", "Sep")),
# add sample names
Sample = paste0('Sample_',seq(1:n())))%>%
# label if outlier sample by Month
group_by(Month) %>%
mutate(OutlierFlag = ifelse((Ozone<quantile(Ozone,1/3,na.rm=T)-1.5*IQR(Ozone,na.rm=T)) | (Ozone>quantile(Ozone,2/3,na.rm=T)+1.5*IQR(Ozone,na.rm=T)),'Outlier','NotOutlier'))%>%
group_by()
# boxplot
p <- ggplot(mydata, aes(x = Month, y = Ozone)) +
geom_boxplot()+
geom_point(data=mydata %>% filter(OutlierFlag=="Outlier"),aes(group=Month,label1=Sample,label2=Ozone),size=2)
output = ggplotly(p, tooltip=c("label1","label2"))
# makes boxplot outliers invisible and hover info off
for (i in 1:length(output$x$data)){
if (output$x$data[[i]]$type=="box"){
output$x$data[[i]]$marker$opacity = 0
output$x$data[[i]]$hoverinfo = "none"
}
}
# print end result of plotly graph
output