Plotly:在箱线图中用样本名称注释异常值

Plotly: Annotate outliers with sample names in boxplot

我正在尝试使用 ggplot 创建一个箱线图并绘制数据集 airquality,其中 Month 在 x 轴上,Ozone 值在 y 轴上。我的目标是注释情节,以便当我将鼠标悬停在离群点上时,除了臭氧值外,它还应该显示 Sample 名称:

library(tidyverse)
library(plotly)
library(datasets)
data(airquality)

# add months
airquality$Month <- factor(airquality$Month,
                           labels = c("May", "Jun", "Jul", "Aug", "Sep"))

# add sample names
airquality$Sample <- paste0('Sample_',seq(1:nrow(airquality)))

# boxplot
p <- ggplot(airquality, aes(x = Month, y = Ozone)) +
  geom_boxplot()
p <- plotly_build(p)
p

这是创建的情节:

默认情况下,当我将鼠标悬停在每个框上时,它会显示 x 轴变量的基本摘要统计信息。但是,我还想看看异常样本是什么。例如将鼠标悬停在 May 上时,它显示异常值 115 但不显示它实际上是 Sample_30

如何将 Sample 变量添加到离群点,使其同时显示离群值和样本名称?

我在 https://github.com/ropensci/plotly/issues/887

上找到了解决方案

尝试制作这种代码!

 library(plotly)

 vals <- boxplot(airquality$Ozone,plot = FALSE)
 y <- airquality[airquality$Ozone > vals$stats[5,1] | airquality$Ozone < vals$stats[1,1],]

plot_ly(airquality,y = ~Ozone,x = ~Month,type = "box") %>% 
   add_markers(data = y, text = y$Day)

我们可以几乎这样得到:

library(ggplot2)
library(plotly)
library(datasets)
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
                           labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_',seq(1:nrow(airquality)))
# boxplot
gg <- ggplot(airquality, aes(x = Month, y = Ozone)) +
  geom_boxplot()
ggly <- ggplotly(gg)
# add hover info
hoverinfo <- with(airquality, paste0("sample: ", Sample, "</br></br>", 
                                     "month: ", Month, "</br>",
                                     "ozone: ", Ozone))
ggly$x$data[[1]]$text <- hoverinfo
ggly$x$data[[1]]$hoverinfo <- c("text", "boxes")

ggly

不幸的是,悬停不适用于第一个箱线图...

我已经通过 Shiny 实现了这一目标。

library(plotly)
library(shiny)
library(htmlwidgets)
library(datasets)

# Prepare data ----
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
                           labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_', seq(1:nrow(airquality)))

# Plotly on hover event ----
addHoverBehavior <- c(
  "function(el, x){",
  "  el.on('plotly_hover', function(data) {",
  "    if(data.points.length==1){",
  "      $('.hovertext').hide();",
  "      Shiny.setInputValue('hovering', true);",
  "      var d = data.points[0];",
  "      Shiny.setInputValue('left_px', d.xaxis.d2p(d.x) + d.xaxis._offset);",
  "      Shiny.setInputValue('top_px', d.yaxis.l2p(d.y) + d.yaxis._offset);",
  "      Shiny.setInputValue('dx', d.x);",
  "      Shiny.setInputValue('dy', d.y);",
  "      Shiny.setInputValue('dtext', d.text);",
  "    }",
  "  });",
  "  el.on('plotly_unhover', function(data) {",
  "    Shiny.setInputValue('hovering', false);",
  "  });",
  "}")

# Shiny app ----
ui <- fluidPage(
  tags$head(
    # style for the tooltip with an arrow (http://www.cssarrowplease.com/)
    tags$style("
               .arrow_box {
                    position: absolute;
                  pointer-events: none;
                  z-index: 100;
                  white-space: nowrap;
                  background: rgb(54,57,64);
                  color: white;
                  font-size: 14px;
                  border: 1px solid;
                  border-color: rgb(54,57,64);
                  border-radius: 1px;
               }
               .arrow_box:after, .arrow_box:before {
                  right: 100%;
                  top: 50%;
                  border: solid transparent;
                  content: ' ';
                  height: 0;
                  width: 0;
                  position: absolute;
                  pointer-events: none;
               }
               .arrow_box:after {
                  border-color: rgba(136, 183, 213, 0);
                  border-right-color: rgb(54,57,64);
                  border-width: 4px;
                  margin-top: -4px;
               }
               .arrow_box:before {
                  border-color: rgba(194, 225, 245, 0);
                  border-right-color: rgb(54,57,64);
                  border-width: 10px;
                  margin-top: -10px;
               }")
  ),
  div(
    style = "position:relative",
    plotlyOutput("myplot"),
    uiOutput("hover_info")
  )
)

server <- function(input, output){
  output$myplot <- renderPlotly({
    airquality[[".id"]] <- seq_len(nrow(airquality))
    gg <- ggplot(airquality, aes(x=Month, y=Ozone, ids=.id)) + geom_boxplot()
    ggly <- ggplotly(gg, tooltip = "y")
    ids <- ggly$x$data[[1]]$ids
    ggly$x$data[[1]]$text <- 
      with(airquality, paste0("<b> sample: </b>", Sample, "<br/>",
                              "<b> month: </b>", Month, "<br/>",
                              "<b> ozone: </b>", Ozone))[ids]
    ggly %>% onRender(addHoverBehavior)
  })
  output$hover_info <- renderUI({
    if(isTRUE(input[["hovering"]])){
      style <- paste0("left: ", input[["left_px"]] + 4 + 5, "px;", # 4 = border-width after
                      "top: ", input[["top_px"]] - 24 - 2 - 1, "px;") # 24 = line-height/2 * number of lines; 2 = padding; 1 = border thickness
      div(
        class = "arrow_box", style = style,
        p(HTML(input$dtext), 
          style="margin: 0; padding: 2px; line-height: 16px;")
      )
    }
  })
}

shinyApp(ui = ui, server = server)

此方法将实现相同的结果,但不显示箱线图汇总统计悬停。删除离群值并悬停在箱线图层上,并覆盖仅包含悬停信息的离群值的 geom_point 层。 plotly 异常值的定义在 here 中说明。在处理更复杂的图形(例如并排分组的箱线图)时,此方法比其他解决方案更有效。有趣的是,此数据的 ggplotly boxplot 图与 ggplot 图不同。 ggplotly 中 Aug 的上围栏胡须比 8 月的 ggplot 上围栏胡须延伸得更远。

library(dplyr)
library(plotly)
library(datasets)
library(ggplot2)
data(airquality)

# manipulate data
mydata = airquality %>% 
    # add months
    mutate(Month = factor(airquality$Month,labels = c("May", "Jun", "Jul", "Aug", "Sep")),
    # add sample names
           Sample = paste0('Sample_',seq(1:n())))%>%
    # label if outlier sample by Month
    group_by(Month) %>% 
    mutate(OutlierFlag = ifelse((Ozone<quantile(Ozone,1/3,na.rm=T)-1.5*IQR(Ozone,na.rm=T)) | (Ozone>quantile(Ozone,2/3,na.rm=T)+1.5*IQR(Ozone,na.rm=T)),'Outlier','NotOutlier'))%>%
    group_by()


# boxplot
p <- ggplot(mydata, aes(x = Month, y = Ozone)) +
    geom_boxplot()+
    geom_point(data=mydata %>% filter(OutlierFlag=="Outlier"),aes(group=Month,label1=Sample,label2=Ozone),size=2)

output = ggplotly(p, tooltip=c("label1","label2"))

# makes boxplot outliers invisible and hover info off
for (i in 1:length(output$x$data)){
    if (output$x$data[[i]]$type=="box"){
        output$x$data[[i]]$marker$opacity = 0  
        output$x$data[[i]]$hoverinfo = "none"
    }
}

# print end result of plotly graph
output