如何使用 VBA 和 Chromedriver 优化等待方法
How to optimize the wait method using VBA and Chromedriver
在这个主页“http://www.kpia.or.kr/index.php/year_sugub”
如果你检查html,从li1到li6有6个id。第一次使用 chromedriver 后,我首先注意到的是 wait 方法无效。于是在网上搜索了各种优化点击后等待的方法在这个主页上使用。
例如,我应用了以下三种编码。
ex1)
Application.Wait 现在 + TimeSerial (0, 0, 5)
ex2)
.FindElementById("li2",超时:=10000).Click
ex3)
'做
'做事件
'出错继续下一步
'设置 ele = .FindElementById ("li2")
'出错时转到 0
'If Timer - t = 10 Then Exit Do' <== 避免死循环
'当 ele 什么都不是时循环
但是,不使用Application.WaitNow + TimeSerial(0, 0, 5),我们最终找不到优化等待方法的方法。这个方法点击li2后没有完全加载,但是偶尔会执行额外的任务
所以,我想到了一个形式化的编码逻辑,以后偶尔可以用来做类似的编码,于是想出了如下逻辑。例如,在li2中,Ethylene值始终是与结果值固定的值,因此如果您点击li2,然后查找"SM"值,数据将被加载到sheet中。接下来li3中的"LDPE"就是加载完成后将数据粘贴到sheet中的方式。所以我正在用这个想法编码,但我在处理 VBA.
时无法解决错误
Dim d As WebDriver, ws As Worksheet, clipboard As Object
Set d = New ChromeDriver
Set ws = ThisWorkbook.Worksheets("Sheet3")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Const URL = "http://www.kpia.or.kr/index.php/year_sugub"
Dim html As HTMLDocument
Set html = New HTMLDocument
With d
.AddArgument "--headless"
.Start "Chrome"
.get URL, Raise:=False
rep:
.FindElementById("li2", timeout:=10000).Click
Dim Posts As WebElements
Dim elem As WebElements
Dim a1 As Integer
For Each Posts In .FindElementsByClass("bbs")
For Each elem In Posts.FindElementsByCss("td")
If Not elem.Text = "SM" Is Nothing Then
html.body.innerHTML = d.PageSource
Dim tarTable As HTMLTable
Dim hTable As HTMLTable
For Each tarTable In html.getElementsByTagName("table")
If InStr(tarTable.className, "bbs") <> 0 Then
Set hTable = tarTable
End If
Next
clipboard.SetText .FindElementById("table_body").Attribute("outerText")
clipboard.PutInClipboard
else
goto rep
end if
.Quit
End With
如果找到与SM值匹配的值,则认为加载完成并继续将相关数据传输到剪贴板。如果找不到 SM 值,则使用 GOTO 来使用 .FindElementById ("li2" timeout:=10000)。我想我可以通过创建一个从 .Click 重新启动的循环来修复它。
我是一个初学者,在读书的过程中节省时间,努力学习,如果您能给我更多的帮助,我将不胜感激。
我会完全避免使用浏览器并发出 XMLHTTP POST 请求并解析 XML 响应以写出到 sheet。在覆盖每个选项卡的 gubun 代码循环中执行此操作,即 gubun=1 到 6。
Option Explicit
Public Sub GetTable()
Dim sResponse As String, body As String, columnToWriteOut As Long, gubunNumber As Long
Dim xmlDoc As Object
Set xmlDoc = CreateObject("MSXML2.DOMDocument") 'New MSXML2.DOMDocument60
columnToWriteOut = 1
With CreateObject("MSXML2.XMLHTTP")
For gubunNumber = 1 To 6
body = "gubun=" & CStr(gubunNumber)
.Open "POST", "http://www.kpia.or.kr/index.php/year_sugub/get_year_sugub", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.setRequestHeader "Content-Length", Len(body)
.send body
sResponse = .responseText
With xmlDoc
.validateOnParse = True
.setProperty "SelectionLanguage", "XPath"
.async = False
If Not .LoadXML(sResponse) Then
Err.Raise .parseError.ErrorCode, , .parseError.reason
End If
End With
Dim startYear As Long, endYear As Long, numColumns As Long, numRows As Long, data()
Dim node As Object, nextNode As Object, headers(), i As Long
startYear = xmlDoc.SelectSingleNode("//rec/sy").Text
endYear = xmlDoc.SelectSingleNode("//rec/ey").Text
numRows = xmlDoc.SelectNodes("//product").Length
ReDim headers(1 To endYear - startYear + 3)
numColumns = UBound(headers)
ReDim data(1 To numRows, 1 To numColumns)
headers(1) = "Product": headers(2) = "Category"
For i = 1 To endYear - startYear + 1
headers(i + 2) = startYear + i - 1
Next
Dim r As Long, c As Long, rowCounter As Long
rowCounter = 0
For Each node In xmlDoc.SelectNodes("//rec") ' '//rec/*[not(self::sy) and not(self::ey) and not(self::product)] ?
c = 1: rowCounter = rowCounter + 1
For Each nextNode In node.ChildNodes
Select Case c
Case 3
data(rowCounter, 1) = nextNode.Text
Case Is > 3
data(rowCounter, c - 1) = nextNode.Text
End Select
Select Case rowCounter Mod 4
Case 1
data(rowCounter, 2) = "Production (shipment)"
Case 2
data(rowCounter, 2) = "Export"
Case 3
data(rowCounter, 2) = "income"
Case 0
data(rowCounter, 2) = "Domestic demand "
End Select
c = c + 1
Next
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, columnToWriteOut).Resize(1, UBound(headers)) = headers
.Cells(2, columnToWriteOut).Resize(UBound(data, 1), UBound(data, 2)) = data
End With
columnToWriteOut = columnToWriteOut + UBound(headers) + 2
Next
End With
End Sub
或者您可以循环等待每个 Ajax 调用完成:
Option Explicit
Public Sub GetInfo()
Dim d As WebDriver, ws As Worksheet, clipboard As Object, writeOutColumn As Long
writeOutColumn = 1
Const URL = "http://www.kpia.or.kr/index.php/year_sugub"
Set d = New ChromeDriver
Set ws = ThisWorkbook.Worksheets("Sheet3")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With d
.Start "Chrome"
.get URL
Dim links As Object, i As Long
Set links = .FindElementsByCss("[href*=action_tab]")
For i = 1 To links.Count
If i > 1 Then
links(i).Click
Do
Loop While Not .ExecuteScript("return jQuery.active == 0")
End If
Dim table As Object
Set table = .FindElementByTag("table")
clipboard.SetText table.Attribute("outerHTML")
clipboard.PutInClipboard
ws.Cells(1, writeOutColumn).PasteSpecial
writeOutColumn = writeOutColumn + table.FindElementByTag("tr").FindElementsByTag("td").Count + 2
Set table = Nothing
Next
.Quit
End With
End Sub
在这个主页“http://www.kpia.or.kr/index.php/year_sugub”
如果你检查html,从li1到li6有6个id。第一次使用 chromedriver 后,我首先注意到的是 wait 方法无效。于是在网上搜索了各种优化点击后等待的方法在这个主页上使用。 例如,我应用了以下三种编码。
ex1) Application.Wait 现在 + TimeSerial (0, 0, 5)
ex2) .FindElementById("li2",超时:=10000).Click
ex3) '做 '做事件 '出错继续下一步 '设置 ele = .FindElementById ("li2") '出错时转到 0 'If Timer - t = 10 Then Exit Do' <== 避免死循环 '当 ele 什么都不是时循环
但是,不使用Application.WaitNow + TimeSerial(0, 0, 5),我们最终找不到优化等待方法的方法。这个方法点击li2后没有完全加载,但是偶尔会执行额外的任务
所以,我想到了一个形式化的编码逻辑,以后偶尔可以用来做类似的编码,于是想出了如下逻辑。例如,在li2中,Ethylene值始终是与结果值固定的值,因此如果您点击li2,然后查找"SM"值,数据将被加载到sheet中。接下来li3中的"LDPE"就是加载完成后将数据粘贴到sheet中的方式。所以我正在用这个想法编码,但我在处理 VBA.
时无法解决错误Dim d As WebDriver, ws As Worksheet, clipboard As Object
Set d = New ChromeDriver
Set ws = ThisWorkbook.Worksheets("Sheet3")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Const URL = "http://www.kpia.or.kr/index.php/year_sugub"
Dim html As HTMLDocument
Set html = New HTMLDocument
With d
.AddArgument "--headless"
.Start "Chrome"
.get URL, Raise:=False
rep:
.FindElementById("li2", timeout:=10000).Click
Dim Posts As WebElements
Dim elem As WebElements
Dim a1 As Integer
For Each Posts In .FindElementsByClass("bbs")
For Each elem In Posts.FindElementsByCss("td")
If Not elem.Text = "SM" Is Nothing Then
html.body.innerHTML = d.PageSource
Dim tarTable As HTMLTable
Dim hTable As HTMLTable
For Each tarTable In html.getElementsByTagName("table")
If InStr(tarTable.className, "bbs") <> 0 Then
Set hTable = tarTable
End If
Next
clipboard.SetText .FindElementById("table_body").Attribute("outerText")
clipboard.PutInClipboard
else
goto rep
end if
.Quit
End With
如果找到与SM值匹配的值,则认为加载完成并继续将相关数据传输到剪贴板。如果找不到 SM 值,则使用 GOTO 来使用 .FindElementById ("li2" timeout:=10000)。我想我可以通过创建一个从 .Click 重新启动的循环来修复它。
我是一个初学者,在读书的过程中节省时间,努力学习,如果您能给我更多的帮助,我将不胜感激。
我会完全避免使用浏览器并发出 XMLHTTP POST 请求并解析 XML 响应以写出到 sheet。在覆盖每个选项卡的 gubun 代码循环中执行此操作,即 gubun=1 到 6。
Option Explicit
Public Sub GetTable()
Dim sResponse As String, body As String, columnToWriteOut As Long, gubunNumber As Long
Dim xmlDoc As Object
Set xmlDoc = CreateObject("MSXML2.DOMDocument") 'New MSXML2.DOMDocument60
columnToWriteOut = 1
With CreateObject("MSXML2.XMLHTTP")
For gubunNumber = 1 To 6
body = "gubun=" & CStr(gubunNumber)
.Open "POST", "http://www.kpia.or.kr/index.php/year_sugub/get_year_sugub", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.setRequestHeader "Content-Length", Len(body)
.send body
sResponse = .responseText
With xmlDoc
.validateOnParse = True
.setProperty "SelectionLanguage", "XPath"
.async = False
If Not .LoadXML(sResponse) Then
Err.Raise .parseError.ErrorCode, , .parseError.reason
End If
End With
Dim startYear As Long, endYear As Long, numColumns As Long, numRows As Long, data()
Dim node As Object, nextNode As Object, headers(), i As Long
startYear = xmlDoc.SelectSingleNode("//rec/sy").Text
endYear = xmlDoc.SelectSingleNode("//rec/ey").Text
numRows = xmlDoc.SelectNodes("//product").Length
ReDim headers(1 To endYear - startYear + 3)
numColumns = UBound(headers)
ReDim data(1 To numRows, 1 To numColumns)
headers(1) = "Product": headers(2) = "Category"
For i = 1 To endYear - startYear + 1
headers(i + 2) = startYear + i - 1
Next
Dim r As Long, c As Long, rowCounter As Long
rowCounter = 0
For Each node In xmlDoc.SelectNodes("//rec") ' '//rec/*[not(self::sy) and not(self::ey) and not(self::product)] ?
c = 1: rowCounter = rowCounter + 1
For Each nextNode In node.ChildNodes
Select Case c
Case 3
data(rowCounter, 1) = nextNode.Text
Case Is > 3
data(rowCounter, c - 1) = nextNode.Text
End Select
Select Case rowCounter Mod 4
Case 1
data(rowCounter, 2) = "Production (shipment)"
Case 2
data(rowCounter, 2) = "Export"
Case 3
data(rowCounter, 2) = "income"
Case 0
data(rowCounter, 2) = "Domestic demand "
End Select
c = c + 1
Next
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, columnToWriteOut).Resize(1, UBound(headers)) = headers
.Cells(2, columnToWriteOut).Resize(UBound(data, 1), UBound(data, 2)) = data
End With
columnToWriteOut = columnToWriteOut + UBound(headers) + 2
Next
End With
End Sub
或者您可以循环等待每个 Ajax 调用完成:
Option Explicit
Public Sub GetInfo()
Dim d As WebDriver, ws As Worksheet, clipboard As Object, writeOutColumn As Long
writeOutColumn = 1
Const URL = "http://www.kpia.or.kr/index.php/year_sugub"
Set d = New ChromeDriver
Set ws = ThisWorkbook.Worksheets("Sheet3")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With d
.Start "Chrome"
.get URL
Dim links As Object, i As Long
Set links = .FindElementsByCss("[href*=action_tab]")
For i = 1 To links.Count
If i > 1 Then
links(i).Click
Do
Loop While Not .ExecuteScript("return jQuery.active == 0")
End If
Dim table As Object
Set table = .FindElementByTag("table")
clipboard.SetText table.Attribute("outerHTML")
clipboard.PutInClipboard
ws.Cells(1, writeOutColumn).PasteSpecial
writeOutColumn = writeOutColumn + table.FindElementByTag("tr").FindElementsByTag("td").Count + 2
Set table = Nothing
Next
.Quit
End With
End Sub