正在运行的 R Shiny 应用程序出现问题:来自 https url 的网络抓取在 RStudio 中运行,但在部署到 shinyapps.io 时不再运行
Problem with R Shiny app that was working: web scrape from https url works in RStudio, but no longer works when deployed to shinyapps.io
我写了一个闪亮的应用程序,它可以从 https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx
读取数据,它运行了几个月,但大约一个月前它在 shinyapps.io[ 停止工作了=32=].
我最近发现 post here 表明问题是由于最近过期的 SSL 证书引起的。该网站 green2.kingcounty.gov
的证书确实已于 2020 年 5 月 30 日过期
x <- openssl::download_ssl_cert("green2.kincounty.gov")
lapply(x, `[[`, "validity")
然而,正如 weizhang 在最近提到的 post 中指出的那样,抓取(在这种情况下使用 GET)在 RStudio 中本地工作,但在 [= 的部署版本中不工作35=]。我的代码的 shinyapps.io 日志包含一个警告,然后是一个错误:
2020-07-17T16:09:23.073301+00:00 shinyapps[2571330]: Warning: Error in open.connection: SSL certificate problem: certificate has expired
2020-07-17T16:09:23.077027+00:00 shinyapps[2571330]: 68: open.connection
2020-07-17T16:09:23.077213+00:00 shinyapps[2571330]: Error in open.connection(x, "rb") :
2020-07-17T16:09:23.077028+00:00 shinyapps[2571330]: 66: read_xml.connection
2020-07-17T16:09:23.077214+00:00 shinyapps[2571330]: SSL certificate problem: certificate has expired
2020-07-17T16:09:23.077029+00:00 shinyapps[2571330]: 65: read_xml.character
2020-07-17T16:09:23.077029+00:00 shinyapps[2571330]: 61: read_html.default
2020-07-17T16:09:23.077030+00:00 shinyapps[2571330]: 59: server [/srv/connect/apps/shiny_test/app.R#25]
看来community.rstudio.com
中的讨论从6月4日开始就一直处于休眠状态,希望能在这里找到解决这个问题的方法。
下面提供了我的应用程序的简单版本。
library(shiny)
library(tidyverse)
library(lubridate)
library(rvest)
# Define UI for application that gets data and creates a plot
ui <- fluidPage(
# Application title
titlePanel("Large Lakes Profile Plots"),
# Show a plot of the data
mainPanel(
plotOutput("lakePlot")
)
)
# )
# Define server logic required to draw a histogram
server <- function(input, output) {
mnths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
url <- paste("https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx?type=profile&buoy=wa&year=2020&month=6")
webpage <- read_html(url)
tbls_ls <- webpage %>%
html_nodes("table") %>%
.[1:1] %>%
html_table(fill = TRUE)
data <- as.data.frame(tbls_ls)
data$DateTime <- as.POSIXct(data$Date, format="%m/%d/%Y %H:%M:%S %p")
data$Date <- as.Date(data$DateTime)
data$Locator <- "Washington"
data <- data %>% rename(Depth="Depth..m.",Temperature="Temperature...C.",
Conductance="Specific.Conductance..µS.cm.",`Dissolved Oxygen`="DO.Concentration..mg.l.",
`DO Saturation`="DO.Saturation....",`Chlorophyll, Field`="Chlorophyll..µg.l.",
Turbidity="Turbidity..NTU.",`Phycocyanin, Field`="Phycocyanin..µg.l.")
nms <- names(data)
data <- data %>% gather(nms[3:10],key="ParmDisplayName",value="Value")
output$lakePlot <- renderPlot({
xlabel <- "Temperature"
tmp <- data %>% filter(ParmDisplayName==xlabel)
title <- paste(tmp$Locator[1],xlabel,"in",mnths[as.numeric(month(tmp$Date[1]))],year(tmp$Date[1]),sep=" ")
mrged2 <- tmp[1:days_in_month(as.numeric(month(tmp$Date[1]))),]
mrged2$Date <- seq(as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),1,sep="-")), as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),days_in_month(as.numeric(month(tmp$Date[1]))),sep="-")), by = "days")
mrged2$Depth <- NA
mrged2$Value <- NA
#
tmp <- rbind(tmp,mrged2)
#
ggplot(tmp, aes(x=Value,y=Depth,color=Locator)) +
geom_point() + scale_y_reverse() + facet_wrap(~Date) +
xlim(0,30) + xlab("") +
ggtitle(title)
})
}
# Run the application
shinyApp(ui = ui, server = server)
有点怪异的是,我进入此页面是为了寻找同一问题的解决方案,同时也在尝试抓取金县信息。我会继续寻找,如果我发现有用的东西,我会 post 回来。
一位同事提供了一个使用 curl 的解决方案(不是一个理想的解决方案,因为它禁用了 SSL 证书的验证,但它有效)。至少部分解决方案由 Cole(RStudio 员工)发布here。
library(shiny)
library(tidyverse)
library(lubridate)
library(rvest)
# add this function
geturl <- function(url,handle) {
curl::curl_fetch_memory(url, handle = handle)$content
}
# Define UI for application that gets data and creates a plot
ui <- fluidPage(
# Application title
titlePanel("Large Lakes Profile Plots"),
# Show a plot of the data
mainPanel(
plotOutput("lakePlot")
)
)
# )
# Define server logic required to draw a histogram
server <- function(input, output) {
mnths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
url <- paste("https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx?type=profile&buoy=wa&year=2020&month=6")
# add next two lines
h <- curl::new_handle()
curl::handle_setopt(h, ssl_verifypeer = 0)
# webpage <- read_html(url)
# add next two lines to replace line above
webpage <- read_html(geturl(url,h))
rm(h)
#
tbls_ls <- webpage %>%
html_nodes("table") %>%
.[1:1] %>%
html_table(fill = TRUE)
data <- as.data.frame(tbls_ls)
data$DateTime <- as.POSIXct(data$Date, format="%m/%d/%Y %H:%M:%S %p")
data$Date <- as.Date(data$DateTime)
data$Locator <- "Washington"
data <- data %>% rename(Depth="Depth..m.",Temperature="Temperature...C.",
Conductance="Specific.Conductance..µS.cm.",`Dissolved Oxygen`="DO.Concentration..mg.l.",
`DO Saturation`="DO.Saturation....",`Chlorophyll, Field`="Chlorophyll..µg.l.",
Turbidity="Turbidity..NTU.",`Phycocyanin, Field`="Phycocyanin..µg.l.")
nms <- names(data)
data <- data %>% gather(nms[3:10],key="ParmDisplayName",value="Value")
output$lakePlot <- renderPlot({
xlabel <- "Temperature"
tmp <- data %>% filter(ParmDisplayName==xlabel)
title <- paste(tmp$Locator[1],xlabel,"in",mnths[as.numeric(month(tmp$Date[1]))],year(tmp$Date[1]),sep=" ")
mrged2 <- tmp[1:days_in_month(as.numeric(month(tmp$Date[1]))),]
mrged2$Date <- seq(as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),1,sep="-")), as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),days_in_month(as.numeric(month(tmp$Date[1]))),sep="-")), by = "days")
mrged2$Depth <- NA
mrged2$Value <- NA
#
tmp <- rbind(tmp,mrged2)
#
ggplot(tmp, aes(x=Value,y=Depth,color=Locator)) +
geom_point() + scale_y_reverse() + facet_wrap(~Date) +
xlim(0,30) + xlab("") +
ggtitle(title)
})
}
# Run the application
shinyApp(ui = ui, server = server)
我写了一个闪亮的应用程序,它可以从 https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx
读取数据,它运行了几个月,但大约一个月前它在 shinyapps.io[ 停止工作了=32=].
我最近发现 post here 表明问题是由于最近过期的 SSL 证书引起的。该网站 green2.kingcounty.gov
的证书确实已于 2020 年 5 月 30 日过期
x <- openssl::download_ssl_cert("green2.kincounty.gov")
lapply(x, `[[`, "validity")
然而,正如 weizhang 在最近提到的 post 中指出的那样,抓取(在这种情况下使用 GET)在 RStudio 中本地工作,但在 [= 的部署版本中不工作35=]。我的代码的 shinyapps.io 日志包含一个警告,然后是一个错误:
2020-07-17T16:09:23.073301+00:00 shinyapps[2571330]: Warning: Error in open.connection: SSL certificate problem: certificate has expired
2020-07-17T16:09:23.077027+00:00 shinyapps[2571330]: 68: open.connection
2020-07-17T16:09:23.077213+00:00 shinyapps[2571330]: Error in open.connection(x, "rb") :
2020-07-17T16:09:23.077028+00:00 shinyapps[2571330]: 66: read_xml.connection
2020-07-17T16:09:23.077214+00:00 shinyapps[2571330]: SSL certificate problem: certificate has expired
2020-07-17T16:09:23.077029+00:00 shinyapps[2571330]: 65: read_xml.character
2020-07-17T16:09:23.077029+00:00 shinyapps[2571330]: 61: read_html.default
2020-07-17T16:09:23.077030+00:00 shinyapps[2571330]: 59: server [/srv/connect/apps/shiny_test/app.R#25]
看来community.rstudio.com
中的讨论从6月4日开始就一直处于休眠状态,希望能在这里找到解决这个问题的方法。
下面提供了我的应用程序的简单版本。
library(shiny)
library(tidyverse)
library(lubridate)
library(rvest)
# Define UI for application that gets data and creates a plot
ui <- fluidPage(
# Application title
titlePanel("Large Lakes Profile Plots"),
# Show a plot of the data
mainPanel(
plotOutput("lakePlot")
)
)
# )
# Define server logic required to draw a histogram
server <- function(input, output) {
mnths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
url <- paste("https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx?type=profile&buoy=wa&year=2020&month=6")
webpage <- read_html(url)
tbls_ls <- webpage %>%
html_nodes("table") %>%
.[1:1] %>%
html_table(fill = TRUE)
data <- as.data.frame(tbls_ls)
data$DateTime <- as.POSIXct(data$Date, format="%m/%d/%Y %H:%M:%S %p")
data$Date <- as.Date(data$DateTime)
data$Locator <- "Washington"
data <- data %>% rename(Depth="Depth..m.",Temperature="Temperature...C.",
Conductance="Specific.Conductance..µS.cm.",`Dissolved Oxygen`="DO.Concentration..mg.l.",
`DO Saturation`="DO.Saturation....",`Chlorophyll, Field`="Chlorophyll..µg.l.",
Turbidity="Turbidity..NTU.",`Phycocyanin, Field`="Phycocyanin..µg.l.")
nms <- names(data)
data <- data %>% gather(nms[3:10],key="ParmDisplayName",value="Value")
output$lakePlot <- renderPlot({
xlabel <- "Temperature"
tmp <- data %>% filter(ParmDisplayName==xlabel)
title <- paste(tmp$Locator[1],xlabel,"in",mnths[as.numeric(month(tmp$Date[1]))],year(tmp$Date[1]),sep=" ")
mrged2 <- tmp[1:days_in_month(as.numeric(month(tmp$Date[1]))),]
mrged2$Date <- seq(as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),1,sep="-")), as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),days_in_month(as.numeric(month(tmp$Date[1]))),sep="-")), by = "days")
mrged2$Depth <- NA
mrged2$Value <- NA
#
tmp <- rbind(tmp,mrged2)
#
ggplot(tmp, aes(x=Value,y=Depth,color=Locator)) +
geom_point() + scale_y_reverse() + facet_wrap(~Date) +
xlim(0,30) + xlab("") +
ggtitle(title)
})
}
# Run the application
shinyApp(ui = ui, server = server)
有点怪异的是,我进入此页面是为了寻找同一问题的解决方案,同时也在尝试抓取金县信息。我会继续寻找,如果我发现有用的东西,我会 post 回来。
一位同事提供了一个使用 curl 的解决方案(不是一个理想的解决方案,因为它禁用了 SSL 证书的验证,但它有效)。至少部分解决方案由 Cole(RStudio 员工)发布here。
library(shiny)
library(tidyverse)
library(lubridate)
library(rvest)
# add this function
geturl <- function(url,handle) {
curl::curl_fetch_memory(url, handle = handle)$content
}
# Define UI for application that gets data and creates a plot
ui <- fluidPage(
# Application title
titlePanel("Large Lakes Profile Plots"),
# Show a plot of the data
mainPanel(
plotOutput("lakePlot")
)
)
# )
# Define server logic required to draw a histogram
server <- function(input, output) {
mnths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
url <- paste("https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx?type=profile&buoy=wa&year=2020&month=6")
# add next two lines
h <- curl::new_handle()
curl::handle_setopt(h, ssl_verifypeer = 0)
# webpage <- read_html(url)
# add next two lines to replace line above
webpage <- read_html(geturl(url,h))
rm(h)
#
tbls_ls <- webpage %>%
html_nodes("table") %>%
.[1:1] %>%
html_table(fill = TRUE)
data <- as.data.frame(tbls_ls)
data$DateTime <- as.POSIXct(data$Date, format="%m/%d/%Y %H:%M:%S %p")
data$Date <- as.Date(data$DateTime)
data$Locator <- "Washington"
data <- data %>% rename(Depth="Depth..m.",Temperature="Temperature...C.",
Conductance="Specific.Conductance..µS.cm.",`Dissolved Oxygen`="DO.Concentration..mg.l.",
`DO Saturation`="DO.Saturation....",`Chlorophyll, Field`="Chlorophyll..µg.l.",
Turbidity="Turbidity..NTU.",`Phycocyanin, Field`="Phycocyanin..µg.l.")
nms <- names(data)
data <- data %>% gather(nms[3:10],key="ParmDisplayName",value="Value")
output$lakePlot <- renderPlot({
xlabel <- "Temperature"
tmp <- data %>% filter(ParmDisplayName==xlabel)
title <- paste(tmp$Locator[1],xlabel,"in",mnths[as.numeric(month(tmp$Date[1]))],year(tmp$Date[1]),sep=" ")
mrged2 <- tmp[1:days_in_month(as.numeric(month(tmp$Date[1]))),]
mrged2$Date <- seq(as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),1,sep="-")), as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),days_in_month(as.numeric(month(tmp$Date[1]))),sep="-")), by = "days")
mrged2$Depth <- NA
mrged2$Value <- NA
#
tmp <- rbind(tmp,mrged2)
#
ggplot(tmp, aes(x=Value,y=Depth,color=Locator)) +
geom_point() + scale_y_reverse() + facet_wrap(~Date) +
xlim(0,30) + xlab("") +
ggtitle(title)
})
}
# Run the application
shinyApp(ui = ui, server = server)