R 中的 TryCatch - 循环网络抓取
TryCatch in R - loop webscraping
我正在使用 RSelenium 尝试通过网络抓取多个 URL 来提取多年来的价格信息。我的问题是某些 URLs 可能不存在(我用我需要信息的年份生成了 URLs),我需要跳过它并转到下一个 URL ]马不停蹄
我认为 tryCatch()
会有所帮助,但我不知道如何使用它:
base = "https://www.cochrane.org"
codes_test = list("03040300")
month_ = c("01", "02", "03", "04","05","06","07","08", "09", "10", "11","12")
year_ = c(2008:2019)
html <- apply(expand.grid(base, codes_test, month_, year_),
MARGIN = 1,
FUN = function(x)paste(x, collapse = "/"))
remDr$navigate("https://www.cochrane.org/0304070017/10/2017")
webElement <- remDr$findElement(value = '//*[@id="acessoAutomatico"]/a')
webElement$clickElement()
l <-length(html)
for(j in seq(html)){
sigtap <- foreach(i=1:l) %dopar% {
tryCatch(stop("no"), error = function(e) cat("Error: ",e$message, "\n"))
remDr$navigate(html[i])
names <- remDr$findElements(value = ' //*[@id="content"]/fieldset[4]/fieldset/table/tbody/tr[2]/td[1]/label | //*[@id="content"]/fieldset[4]/fieldset/table/tbody/tr[1]/td[3]/label | //*[@id="content"]/fieldset[4]/fieldset/table/tbody/tr[2]/td[3]/label | //*[@id="content"]/fieldset[4]/fieldset/table/tbody/tr[3]/td[3]/label ' )
infos <- remDr$findElements(value = '[@id="valorSA_Total"] | //*[@id="valorSH"] | //*[@id="valorSP"] | //*[@id="totalInternacao"]')
identificadores <- unlist(lapply(names, function(x) {x$getElementText()}))
informacoes <- unlist(lapply(infos, function(x) {x$getElementText()}))
bind_test[[i]] <- data.frame(identificadores , informacoes)
}}
write.csv(bind_test[[i]], file = paste(bind_test, '.csv', sep = '_'))
感谢大家的帮助!
假设 remDr$navigate(html[i])
会抛出您要捕获的错误,请尝试如下操作:
success <- tryCatch({
remDr$navigate(html[i])
TRUE
},
warning = function(w) { FALSE },
error = function(e) { FALSE },
finally = { })
if (!success) next
我正在使用 RSelenium 尝试通过网络抓取多个 URL 来提取多年来的价格信息。我的问题是某些 URLs 可能不存在(我用我需要信息的年份生成了 URLs),我需要跳过它并转到下一个 URL ]马不停蹄
我认为 tryCatch()
会有所帮助,但我不知道如何使用它:
base = "https://www.cochrane.org"
codes_test = list("03040300")
month_ = c("01", "02", "03", "04","05","06","07","08", "09", "10", "11","12")
year_ = c(2008:2019)
html <- apply(expand.grid(base, codes_test, month_, year_),
MARGIN = 1,
FUN = function(x)paste(x, collapse = "/"))
remDr$navigate("https://www.cochrane.org/0304070017/10/2017")
webElement <- remDr$findElement(value = '//*[@id="acessoAutomatico"]/a')
webElement$clickElement()
l <-length(html)
for(j in seq(html)){
sigtap <- foreach(i=1:l) %dopar% {
tryCatch(stop("no"), error = function(e) cat("Error: ",e$message, "\n"))
remDr$navigate(html[i])
names <- remDr$findElements(value = ' //*[@id="content"]/fieldset[4]/fieldset/table/tbody/tr[2]/td[1]/label | //*[@id="content"]/fieldset[4]/fieldset/table/tbody/tr[1]/td[3]/label | //*[@id="content"]/fieldset[4]/fieldset/table/tbody/tr[2]/td[3]/label | //*[@id="content"]/fieldset[4]/fieldset/table/tbody/tr[3]/td[3]/label ' )
infos <- remDr$findElements(value = '[@id="valorSA_Total"] | //*[@id="valorSH"] | //*[@id="valorSP"] | //*[@id="totalInternacao"]')
identificadores <- unlist(lapply(names, function(x) {x$getElementText()}))
informacoes <- unlist(lapply(infos, function(x) {x$getElementText()}))
bind_test[[i]] <- data.frame(identificadores , informacoes)
}}
write.csv(bind_test[[i]], file = paste(bind_test, '.csv', sep = '_'))
感谢大家的帮助!
假设 remDr$navigate(html[i])
会抛出您要捕获的错误,请尝试如下操作:
success <- tryCatch({
remDr$navigate(html[i])
TRUE
},
warning = function(w) { FALSE },
error = function(e) { FALSE },
finally = { })
if (!success) next