正在运行的 R Shiny 应用程序出现问题:来自 https url 的网络抓取在 RStudio 中运行,但在部署到 shinyapps.io 时不再运行

Problem with R Shiny app that was working: web scrape from https url works in RStudio, but no longer works when deployed to shinyapps.io

我写了一个闪亮的应用程序,它可以从 https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx 读取数据,它运行了几个月,但大约一个月前它在 shinyapps.io[ 停止工作了=32=].

我最近发现 post here 表明问题是由于最近过期的 SSL 证书引起的。该网站 green2.kingcounty.gov 的证书确实已于 2020 年 5 月 30 日过期

x <- openssl::download_ssl_cert("green2.kincounty.gov")
lapply(x, `[[`, "validity")

然而,正如 weizhang 在最近提到的 post 中指出的那样,抓取(在这种情况下使用 GET)在 RStudio 中本地工作,但在 [= 的部署版本中不工作35=]。我的代码的 shinyapps.io 日志包含一个警告,然后是一个错误:

2020-07-17T16:09:23.073301+00:00 shinyapps[2571330]: Warning: Error in open.connection: SSL certificate problem: certificate has expired
2020-07-17T16:09:23.077027+00:00 shinyapps[2571330]:   68: open.connection
2020-07-17T16:09:23.077213+00:00 shinyapps[2571330]: Error in open.connection(x, "rb") : 
2020-07-17T16:09:23.077028+00:00 shinyapps[2571330]:   66: read_xml.connection
2020-07-17T16:09:23.077214+00:00 shinyapps[2571330]:   SSL certificate problem: certificate has expired
2020-07-17T16:09:23.077029+00:00 shinyapps[2571330]:   65: read_xml.character
2020-07-17T16:09:23.077029+00:00 shinyapps[2571330]:   61: read_html.default
2020-07-17T16:09:23.077030+00:00 shinyapps[2571330]:   59: server [/srv/connect/apps/shiny_test/app.R#25]

看来community.rstudio.com中的讨论从6月4日开始就一直处于休眠状态,希望能在这里找到解决这个问题的方法。

下面提供了我的应用程序的简单版本。

library(shiny)
library(tidyverse)
library(lubridate)
library(rvest)

# Define UI for application that gets data and creates a plot
ui <- fluidPage(

    # Application title
    titlePanel("Large Lakes Profile Plots"),

        # Show a plot of the data
        mainPanel(
           plotOutput("lakePlot")
        )
    )
# )

# Define server logic required to draw a histogram
server <- function(input, output) {
    
    mnths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
    
    url <- paste("https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx?type=profile&buoy=wa&year=2020&month=6")
    webpage <- read_html(url)
    tbls_ls <- webpage %>%
        html_nodes("table") %>%
        .[1:1] %>%
        html_table(fill = TRUE)
    data <- as.data.frame(tbls_ls)
    
    data$DateTime <- as.POSIXct(data$Date, format="%m/%d/%Y %H:%M:%S %p")
    data$Date <- as.Date(data$DateTime)
    data$Locator <- "Washington"
    data <- data %>% rename(Depth="Depth..m.",Temperature="Temperature...C.",
                                Conductance="Specific.Conductance..µS.cm.",`Dissolved Oxygen`="DO.Concentration..mg.l.",
                                `DO Saturation`="DO.Saturation....",`Chlorophyll, Field`="Chlorophyll..µg.l.",
                                Turbidity="Turbidity..NTU.",`Phycocyanin, Field`="Phycocyanin..µg.l.")
    nms <- names(data)
    data <- data %>% gather(nms[3:10],key="ParmDisplayName",value="Value")

    output$lakePlot <- renderPlot({

        xlabel <- "Temperature"
        tmp <- data %>% filter(ParmDisplayName==xlabel)
        title <- paste(tmp$Locator[1],xlabel,"in",mnths[as.numeric(month(tmp$Date[1]))],year(tmp$Date[1]),sep=" ")
        mrged2 <- tmp[1:days_in_month(as.numeric(month(tmp$Date[1]))),]
        mrged2$Date <- seq(as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),1,sep="-")), as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),days_in_month(as.numeric(month(tmp$Date[1]))),sep="-")), by = "days")
        mrged2$Depth <- NA
        mrged2$Value <- NA
        #  
        tmp <- rbind(tmp,mrged2)
        # 
        ggplot(tmp, aes(x=Value,y=Depth,color=Locator)) +
            geom_point() +  scale_y_reverse() + facet_wrap(~Date) +
            xlim(0,30) + xlab("") +
            ggtitle(title) 
    })
}

# Run the application 
shinyApp(ui = ui, server = server)

有点怪异的是,我进入此页面是为了寻找同一问题的解决方案,同时也在尝试抓取金县信息。我会继续寻找,如果我发现有用的东西,我会 post 回来。

一位同事提供了一个使用 curl 的解决方案(不是一个理想的解决方案,因为它禁用了 SSL 证书的验证,但它有效)。至少部分解决方案由 Cole(RStudio 员工)发布here

library(shiny)
library(tidyverse)
library(lubridate)
library(rvest)

# add this function
geturl <- function(url,handle) {
    curl::curl_fetch_memory(url, handle = handle)$content
}

# Define UI for application that gets data and creates a plot
ui <- fluidPage(

    # Application title
    titlePanel("Large Lakes Profile Plots"),

        # Show a plot of the data
        mainPanel(
           plotOutput("lakePlot")
        )
    )
# )

# Define server logic required to draw a histogram
server <- function(input, output) {

    mnths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
    
    url <- paste("https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx?type=profile&buoy=wa&year=2020&month=6")
    
    # add next two lines 
    h <- curl::new_handle()
    curl::handle_setopt(h, ssl_verifypeer = 0)

    # webpage <- read_html(url)
    # add next two lines to replace line above
    webpage <- read_html(geturl(url,h))
    rm(h)    
    #
    tbls_ls <- webpage %>%
        html_nodes("table") %>%
        .[1:1] %>%
        html_table(fill = TRUE)
    data <- as.data.frame(tbls_ls)
    
    data$DateTime <- as.POSIXct(data$Date, format="%m/%d/%Y %H:%M:%S %p")
    data$Date <- as.Date(data$DateTime)
    data$Locator <- "Washington"
    data <- data %>% rename(Depth="Depth..m.",Temperature="Temperature...C.",
                                Conductance="Specific.Conductance..µS.cm.",`Dissolved Oxygen`="DO.Concentration..mg.l.",
                                `DO Saturation`="DO.Saturation....",`Chlorophyll, Field`="Chlorophyll..µg.l.",
                                Turbidity="Turbidity..NTU.",`Phycocyanin, Field`="Phycocyanin..µg.l.")
    nms <- names(data)
    data <- data %>% gather(nms[3:10],key="ParmDisplayName",value="Value")

    output$lakePlot <- renderPlot({

        xlabel <- "Temperature"
        tmp <- data %>% filter(ParmDisplayName==xlabel)
        title <- paste(tmp$Locator[1],xlabel,"in",mnths[as.numeric(month(tmp$Date[1]))],year(tmp$Date[1]),sep=" ")
        mrged2 <- tmp[1:days_in_month(as.numeric(month(tmp$Date[1]))),]
        mrged2$Date <- seq(as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),1,sep="-")), as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),days_in_month(as.numeric(month(tmp$Date[1]))),sep="-")), by = "days")
        mrged2$Depth <- NA
        mrged2$Value <- NA
        #  
        tmp <- rbind(tmp,mrged2)
        # 
        ggplot(tmp, aes(x=Value,y=Depth,color=Locator)) +
            geom_point() +  scale_y_reverse() + facet_wrap(~Date) +
            xlim(0,30) + xlab("") +
            ggtitle(title) 
    })
}

# Run the application 
shinyApp(ui = ui, server = server)