如何使用 VBA 跳过缺少 html 标记的 Excel 行
How to skip a row in Excel with missing html tag using VBA
本网站列出了 15 个对象,每个对象的照片下方都有一个 link。第 6 个对象有 none。当使用我的代码提取和传输内容时,缺少的 html-href 不会被跳过,在 Excel 中,14 个 href 列在彼此下方(第 6 个单元格应保持为空或“无文档”),但最后一个单元格确实如此(&错误,因为 14<=>15)。不幸的是,我必须保留我的代码结构,只需要一个循环或条件来完成它。有人有什么想法吗?谢谢
我的不完整代码:
Public Sub GetData()
Dim html As New HTMLDocument
Dim elmt01 As Object, elmt02 As Object
Dim y As Long
Dim xURL As String
Set html = New MSHTML.HTMLDocument
xURL = "https://immobilienpool.de/suche/immobilien?page=1"
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", xURL, False
.send
html.body.innerHTML = .responseText
End With
Set elmt01 = html.querySelectorAll("li[class*='contentBox']") '15 items
Set elmt02 = html.querySelectorAll("li a[title*='zusätzliche']") '14 hrefs
For y = 0 To elmt01.Length - 1
If InStr(elmt02, "pdf") Then 'better: If elmt02 exists in elmt01 then...
ActiveSheet.Cells(y + 1, 2) = elmt02.Item(y).href
Else
ActiveSheet.Cells(y + 1, 2) = "No document"
End If
Next
End Sub
以下脚本应该可以解决您遇到的问题。我不得不修改您的代码以跳过空白行。希望您能遵守当前版本:
Public Sub GetData()
Dim Html As HTMLDocument, HTMLDoc As HTMLDocument
Dim oPdfLink As Object, xURL As String, I As Long
Set Html = New MSHTML.HTMLDocument
Set HTMLDoc = New MSHTML.HTMLDocument
xURL = "https://immobilienpool.de/suche/immobilien?page=1"
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", xURL, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("li[class*='contentBox']")
For I = 0 To .Length - 1
HTMLDoc.body.innerHTML = .item(I).outerHTML
Set oPdfLink = HTMLDoc.querySelector("a[title*='zusätzliche']")
If Not oPdfLink Is Nothing Then
ActiveSheet.Cells(I + 1, 2) = oPdfLink.href
Else:
ActiveSheet.Cells(I + 1, 2) = "No document"
End If
Next I
End With
End Sub
本网站列出了 15 个对象,每个对象的照片下方都有一个 link。第 6 个对象有 none。当使用我的代码提取和传输内容时,缺少的 html-href 不会被跳过,在 Excel 中,14 个 href 列在彼此下方(第 6 个单元格应保持为空或“无文档”),但最后一个单元格确实如此(&错误,因为 14<=>15)。不幸的是,我必须保留我的代码结构,只需要一个循环或条件来完成它。有人有什么想法吗?谢谢
我的不完整代码:
Public Sub GetData()
Dim html As New HTMLDocument
Dim elmt01 As Object, elmt02 As Object
Dim y As Long
Dim xURL As String
Set html = New MSHTML.HTMLDocument
xURL = "https://immobilienpool.de/suche/immobilien?page=1"
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", xURL, False
.send
html.body.innerHTML = .responseText
End With
Set elmt01 = html.querySelectorAll("li[class*='contentBox']") '15 items
Set elmt02 = html.querySelectorAll("li a[title*='zusätzliche']") '14 hrefs
For y = 0 To elmt01.Length - 1
If InStr(elmt02, "pdf") Then 'better: If elmt02 exists in elmt01 then...
ActiveSheet.Cells(y + 1, 2) = elmt02.Item(y).href
Else
ActiveSheet.Cells(y + 1, 2) = "No document"
End If
Next
End Sub
以下脚本应该可以解决您遇到的问题。我不得不修改您的代码以跳过空白行。希望您能遵守当前版本:
Public Sub GetData()
Dim Html As HTMLDocument, HTMLDoc As HTMLDocument
Dim oPdfLink As Object, xURL As String, I As Long
Set Html = New MSHTML.HTMLDocument
Set HTMLDoc = New MSHTML.HTMLDocument
xURL = "https://immobilienpool.de/suche/immobilien?page=1"
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", xURL, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("li[class*='contentBox']")
For I = 0 To .Length - 1
HTMLDoc.body.innerHTML = .item(I).outerHTML
Set oPdfLink = HTMLDoc.querySelector("a[title*='zusätzliche']")
If Not oPdfLink Is Nothing Then
ActiveSheet.Cells(I + 1, 2) = oPdfLink.href
Else:
ActiveSheet.Cells(I + 1, 2) = "No document"
End If
Next I
End With
End Sub