如何用"getElementByClassName()"解析VBA中的HTML个元素?
How to use "getElementByClassName()" to parsing the HTML elements in VBA?
我将从 HTML 中获取元素的值。
HTML 代码如下。
可以显示在屏幕上
Code screenshot
Dim xmlhttp As Object
Dim url As String
Dim toTranslate As String
Dim htmldoc As HTMLDocument
toTranslate = TextBox1.Value
url = "http://dict.youdao.com/search?q=" & toTranslate & "&keyfrom=dict.index"
Set xmlhttp = CreateObject("MSXML2.XMLHTTP") '创建XML对象
xmlhttp.Open "GET", url, False '用GET 发送请求
xmlhttp.send
'等待响应
Do While xmlhttp.readyState <> 4
DoEvents
Loop
Dim explore As New InternetExplorer
'Set htmldoc =explore.document
MsgBox xmlhttp.responseText
但是我想获取class为“trans-container”(每个中文单词)的元素中标签“li”的每个值。
The content I want to get
我只知道“getElementsByClassName()”这个方法,但是不知道怎么用。感谢您的帮助!
您需要根据响应创建一个 HTMLDocument 对象并将其用于解析。正如代码中所注释的,使用方法getElementsByClassName
需要提前绑定。
尝试如下操作:
Dim url As String
Dim toTranslate As String
toTranslate = TextBox1.Value
' Note: use https:// rather than http://
url = "https://dict.youdao.com/search?q=" & toTranslate & "&keyfrom=dict.index"
' Creating and sending the request:
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.XMLHTTP") '创建XML对象
xmlhttp.Open "GET", url, False '用GET 发送请求
xmlhttp.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"
xmlhttp.send ""
' Getting the response
' This needs to be early bound so the method getElementsByClassName is available!
' The required reference is "Microsoft HTML Object Library"
Dim objHTML as HTMLDocument
Set objHTML = New HTMLDocument
objHTML.body.innerHTML = xmlhttp.responseText
' Parsing the response
Dim objTransContainers as Object, objTransContainer as Object
Dim objLis as Object, objLi as Object
Dim retText as String
Set objTransContainers = objHTML.getElementsByClassName("trans-container")
For Each objTransContainer in objTransContainers
Set objLis = objTransContainer.getElementsByTagName("li")
For Each objLi in objLis
retText = retText & objLi.innerText & vbNewLine
Next objLi
Next objTransContainer
MsgBox retText
或者,您可以仅使用标签并在循环中检查 class 名称以进行解析。优点是,此方法也适用于后期绑定的 HTMLDocument:
' Getting the response:
Dim objHTML as Object
Set objHTML = CreateObject("htmlFile")
' Note: this objHTML.write will not work with early binding!
' In that case you have to use the .body.innerHTML
' like in the code sample above.
With objHTML
.Open
.write xmlhttp.responseText
.Close
End With
' Parsing the response
Dim objDivs as Object, objDiv as Object
Dim objLis as Object, objLi as Object
Dim retText as String
Set objDivs = objHTML.getElementsByTagName("div")
For Each objDiv in objDivs
If objDiv.className = "trans-container" Then
Set objLis = objDiv.getElementsByTagName("li")
For Each objLi in objLis
retText = retText & objLi.innerText & vbNewLine
Next objLi
End If
Next objDiv
MsgBox retText
数组替代使用FilterXML()
基于接收到的响应字符串,我演示了一种通过 FilterXML()
获取列表项的方法,自 vers. 2013+.
Function getListItems(ByVal sResponse As String, Optional IsZeroBased As Boolean = False)
'Purpose: assign list items in div trans-container class to 1-dim array
'XPath search expression
Dim xp As String
xp = "//div[@class='trans-container']/ul/li"
With WorksheetFunction
'assign list items to 1-based 2-dim array
Dim listItems: listItems = .FilterXML(sResponse, xp)
'optional Test display in VB Editor's immediate window
Debug.Print Join(.Transpose(listItems), ", ")
'return <li> items as 1-dim array (optionally 0-based)
getListItems = .Transpose(listItems)
If IsZeroBased Then ReDim Preserve getListItems(0 To UBound(getListItems) - 1)
End With
End Function
我将从 HTML 中获取元素的值。 HTML 代码如下。
可以显示在屏幕上
Code screenshot
Dim xmlhttp As Object
Dim url As String
Dim toTranslate As String
Dim htmldoc As HTMLDocument
toTranslate = TextBox1.Value
url = "http://dict.youdao.com/search?q=" & toTranslate & "&keyfrom=dict.index"
Set xmlhttp = CreateObject("MSXML2.XMLHTTP") '创建XML对象
xmlhttp.Open "GET", url, False '用GET 发送请求
xmlhttp.send
'等待响应
Do While xmlhttp.readyState <> 4
DoEvents
Loop
Dim explore As New InternetExplorer
'Set htmldoc =explore.document
MsgBox xmlhttp.responseText
但是我想获取class为“trans-container”(每个中文单词)的元素中标签“li”的每个值。
The content I want to get
我只知道“getElementsByClassName()”这个方法,但是不知道怎么用。感谢您的帮助!
您需要根据响应创建一个 HTMLDocument 对象并将其用于解析。正如代码中所注释的,使用方法getElementsByClassName
需要提前绑定。
尝试如下操作:
Dim url As String
Dim toTranslate As String
toTranslate = TextBox1.Value
' Note: use https:// rather than http://
url = "https://dict.youdao.com/search?q=" & toTranslate & "&keyfrom=dict.index"
' Creating and sending the request:
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.XMLHTTP") '创建XML对象
xmlhttp.Open "GET", url, False '用GET 发送请求
xmlhttp.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"
xmlhttp.send ""
' Getting the response
' This needs to be early bound so the method getElementsByClassName is available!
' The required reference is "Microsoft HTML Object Library"
Dim objHTML as HTMLDocument
Set objHTML = New HTMLDocument
objHTML.body.innerHTML = xmlhttp.responseText
' Parsing the response
Dim objTransContainers as Object, objTransContainer as Object
Dim objLis as Object, objLi as Object
Dim retText as String
Set objTransContainers = objHTML.getElementsByClassName("trans-container")
For Each objTransContainer in objTransContainers
Set objLis = objTransContainer.getElementsByTagName("li")
For Each objLi in objLis
retText = retText & objLi.innerText & vbNewLine
Next objLi
Next objTransContainer
MsgBox retText
或者,您可以仅使用标签并在循环中检查 class 名称以进行解析。优点是,此方法也适用于后期绑定的 HTMLDocument:
' Getting the response:
Dim objHTML as Object
Set objHTML = CreateObject("htmlFile")
' Note: this objHTML.write will not work with early binding!
' In that case you have to use the .body.innerHTML
' like in the code sample above.
With objHTML
.Open
.write xmlhttp.responseText
.Close
End With
' Parsing the response
Dim objDivs as Object, objDiv as Object
Dim objLis as Object, objLi as Object
Dim retText as String
Set objDivs = objHTML.getElementsByTagName("div")
For Each objDiv in objDivs
If objDiv.className = "trans-container" Then
Set objLis = objDiv.getElementsByTagName("li")
For Each objLi in objLis
retText = retText & objLi.innerText & vbNewLine
Next objLi
End If
Next objDiv
MsgBox retText
数组替代使用FilterXML()
基于接收到的响应字符串,我演示了一种通过 FilterXML()
获取列表项的方法,自 vers. 2013+.
Function getListItems(ByVal sResponse As String, Optional IsZeroBased As Boolean = False)
'Purpose: assign list items in div trans-container class to 1-dim array
'XPath search expression
Dim xp As String
xp = "//div[@class='trans-container']/ul/li"
With WorksheetFunction
'assign list items to 1-based 2-dim array
Dim listItems: listItems = .FilterXML(sResponse, xp)
'optional Test display in VB Editor's immediate window
Debug.Print Join(.Transpose(listItems), ", ")
'return <li> items as 1-dim array (optionally 0-based)
getListItems = .Transpose(listItems)
If IsZeroBased Then ReDim Preserve getListItems(0 To UBound(getListItems) - 1)
End With
End Function