如何使用 VBA 和选择器获取 HTML 中标签的第 n 项以导出到 Excel
How to get the nth item of a tag in HTML using VBA and selectors for export to Excel
如何使用选择器检查“alt”的内容是“Meeting”还是“Canceled”?
<body>
<div class="mainBox">
<li class="content">
<img class="title" src="https://url_01/images/img01.gif" alt="Meeting">
</li>
<li class="content">
<img class="title" src="https://url_02/images/img02.gif" alt="Meeting">
</li>
<li class="content">
<img class="title" src="https://url_03/images/img03.gif" alt="Canceled">
</li>
<li class="content">
<img class="title" src="https://url_04/images/img04.gif" alt="Canceled">
</li>
</div>
</body>
重要提示: 选择器必须引用 class="title" 因为这 4 个字符串中的所有其他标签都出现在整个 html 代码中经常不同。谢谢
我的代码:
Public Sub GetData()
Dim ohtml As New HTMLDocument
Dim elmt As Object
Dim x As Long
Set ohtml = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", "https:...", False
.send
ohtml.body.innerHTML = .responseText
End With
For x = 0 To elmt.Length - 1
Set elmt = ohtml.querySelectorAll("img[class='title']").Item(x) '<<== ??????
If instr(????????, "Meeting") then '<<===== ???????
ActiveSheet.Cells(x + 2, 2) = "Meeting"
Else
ActiveSheet.Cells(x + 2, 2) = "Canceled"
End If
Next
End Sub
循环前设置elmt
Public Sub GetData()
Dim ohtml As New HTMLDocument, elmt As Object, i As Integer
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", "https:...", False
.send
ohtml.body.innerHTML = .responseText
End With
Set elmt = ohtml.querySelectorAll("img[class='title']")
For i = 1 To elmt.Length
'Debug.Print i, elmt(i - 1).alt, elmt(i - 1).src
ActiveSheet.Cells(i + 1, 2) = elmt(i - 1).alt
Next
End Sub
update1 : 将 s, s2 替换为 .responseText
扩展您的 css 选择器以包含那些额外的 attribute = values 选择器,这样 elmt
只包含符合条件的节点,然后在循环期间提取 alt
属性值。是的,在循环之前设置 elmt
。
Option Explicit
Public Sub GetData()
Dim ohtml As New HTMLDocument
Dim elmt As Object 'Dim elmt As MSHTML.IHTMLDOMChildrenCollection '' for later mshtml.dll updates May 2021 onwards
Dim x As Long
Set ohtml = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", "https:...", False
.send
ohtml.body.innerHTML = .responseText
End With
Set elmt = ohtml.querySelectorAll("img[class=title][alt=Canceled], img[class=title][alt=Meeting]")
For x = 0 To elmt.Length - 1
ActiveSheet.Cells(x + 2, 2) = elmt.Item(x).alt
Next
End Sub
如何使用选择器检查“alt”的内容是“Meeting”还是“Canceled”?
<body>
<div class="mainBox">
<li class="content">
<img class="title" src="https://url_01/images/img01.gif" alt="Meeting">
</li>
<li class="content">
<img class="title" src="https://url_02/images/img02.gif" alt="Meeting">
</li>
<li class="content">
<img class="title" src="https://url_03/images/img03.gif" alt="Canceled">
</li>
<li class="content">
<img class="title" src="https://url_04/images/img04.gif" alt="Canceled">
</li>
</div>
</body>
重要提示: 选择器必须引用 class="title" 因为这 4 个字符串中的所有其他标签都出现在整个 html 代码中经常不同。谢谢
我的代码:
Public Sub GetData()
Dim ohtml As New HTMLDocument
Dim elmt As Object
Dim x As Long
Set ohtml = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", "https:...", False
.send
ohtml.body.innerHTML = .responseText
End With
For x = 0 To elmt.Length - 1
Set elmt = ohtml.querySelectorAll("img[class='title']").Item(x) '<<== ??????
If instr(????????, "Meeting") then '<<===== ???????
ActiveSheet.Cells(x + 2, 2) = "Meeting"
Else
ActiveSheet.Cells(x + 2, 2) = "Canceled"
End If
Next
End Sub
循环前设置elmt
Public Sub GetData()
Dim ohtml As New HTMLDocument, elmt As Object, i As Integer
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", "https:...", False
.send
ohtml.body.innerHTML = .responseText
End With
Set elmt = ohtml.querySelectorAll("img[class='title']")
For i = 1 To elmt.Length
'Debug.Print i, elmt(i - 1).alt, elmt(i - 1).src
ActiveSheet.Cells(i + 1, 2) = elmt(i - 1).alt
Next
End Sub
update1 : 将 s, s2 替换为 .responseText
扩展您的 css 选择器以包含那些额外的 attribute = values 选择器,这样 elmt
只包含符合条件的节点,然后在循环期间提取 alt
属性值。是的,在循环之前设置 elmt
。
Option Explicit
Public Sub GetData()
Dim ohtml As New HTMLDocument
Dim elmt As Object 'Dim elmt As MSHTML.IHTMLDOMChildrenCollection '' for later mshtml.dll updates May 2021 onwards
Dim x As Long
Set ohtml = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", "https:...", False
.send
ohtml.body.innerHTML = .responseText
End With
Set elmt = ohtml.querySelectorAll("img[class=title][alt=Canceled], img[class=title][alt=Meeting]")
For x = 0 To elmt.Length - 1
ActiveSheet.Cells(x + 2, 2) = elmt.Item(x).alt
Next
End Sub