VBA 无法使用 .getElementsByTag() 或 .getElementByID() 从 HTML 获取数据
VBA cannot get data from HTML with .getElementsByTag() nor .getElementByID()
我当前的项目包括从 HTML 源代码中检索数据。
具体来说,我正在查看此网站上的崩溃案例:
https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=112007272
我想通过查找特定 tags/IDs 的 .innertext
从 HTML 收集所有相关数据。
到目前为止我的代码:
Sub ExtractData()
mystart:
'First I create two Internet Explorer object
Set objIE = CreateObject("InternetExplorer.Application") 'this browser contains the list of cases
objIE.Top = 0
objIE.Left = 0
objIE.Width = 1600
objIE.Height = 900
objIE.Visible = True 'We can see IE
Set objIEdata = CreateObject("InternetExplorer.Application") 'this browser opens the specific case
objIEdata.Top = 0
objIEdata.Left = 0
objIEdata.Width = 1600
objIEdata.Height = 900
objIEdata.Visible = True 'We can see IE
On Error Resume Next
objIE.navigate ("https://crashviewer.nhtsa.dot.gov/LegacyCDS/Index") 'url of website
Do
DoEvents
If Err.Number <> 0 Then
objIE.Quit
Set objIE = Nothing
GoTo mystart:
End If
Loop Until objIE.readystate = 4
'we define an object variable Alllinks and loop through all the links to search for
Set aAlllinks = objIE.document.getElementsByTagName("button") 'looks for Search Button
For Each Hyperlink In aAlllinks
If Hyperlink.innertext = " Search" Then
Hyperlink.Click
Exit For
Else
MsgBox "Search Button was not found. Please improve code!"
End If
Next
Application.Wait (Now + TimeValue("0:00:02"))
Set bAlllinks = objIE.document.getElementsByTagName("a") 'all Hyperlinks on webpage start with Tag "a"
For Each Hyperlink In bAlllinks
If UBound(Split(Hyperlink.innertext, "-")) = 2 And Len(Hyperlink.innertext) = 11 Then 'case specific to find the Hyperlinks which contain cases
Debug.Print Hyperlink.innertext
'2nd IE is used for each case
restart:
objIEdata.navigate (Hyperlink.href) 'url of each case
Do
DoEvents
If Err.Number <> 0 Then
objIEdata.Quit
Set objIE = Nothing
GoTo restart:
End If
Loop Until objIEdata.readystate = 4
Set register = objIEdata.document.getElementByTagName("tbody") 'objIEdata.document.getElementByID("main").getElementByID("mainSection") '.getElementByID("bodyMain").getElementsByTagName("tbody")
For Each untermenue In register
Debug.Print untermenue.innerHTML
Next
Application.Wait (Now + TimeValue("0:00:02"))
End If
Next
objIE.Quit
objIEdata.Quit
End Sub
请注意,IE 的可见性只是出于调试原因。
让我困惑的部分是
Set register = objIEdata.document.getElementByTagName("tbody")
.
如果我寻找 .TagName("tbody")
,变量寄存器返回空,如果我寻找 .ID("bodyMain")
,也会发生同样的情况。不幸的是,我不熟悉 HTML 以及 VBA 如何与 HTML 文档交互。我的印象是我可以通过它们的 ID 来处理所有元素,如果它们恰好有一个,但这似乎不起作用。
我是否需要自己完成 HTML "branches" 或者代码是否应该能够找到每个 ID,无论它是在 "child" 中找到的?
非常感谢
你问的是一个很大的要求,所以我将提供一些指导和起始代码。我的代码应该写出所有的表格,但你会想尝试一下以获得你想要的格式。围绕有效选择元素肯定有足够的逻辑,这应该有所帮助。 * 由于时间限制,我还没有测试使用 class 循环遍历所有检索到的 ID,但测试了个别情况和所有 ID 的检索。
获取初始案例链接和 ID:
我可能会使用一个函数 returning 一个包含链接和 ID 的数组。如果您提取 ID,它们可以传递给我在下面显示的 XMLHTTP 请求。
URL 是 https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search
Public Function GetLinksAndIds(ByVal URL) As Variant
Dim ie As InternetExplorer, i As Long
Set ie = New InternetExplorer
With ie
.Visible = True
.navigate2 URL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim caseLinks As Object, id As String, newURL As String
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
.Quit
End With
GetLinksAndIds = linksAndIds
End Function
示例 return 值:
对于每种情况 - 使用 XMLHTTP:
我很想避免使用 IE 并使用 XMLHTTP
request(url 编码的查询字符串 return 使用打印选项获得更具可读性的页面版本)。虽然我已经使用 css 选择器进行了解析,但您可以将响应读入 MSXML2.DOMDocument60
并使用 XPath
进行查询。您可以将 caseid 连接成 URL。
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=112007272&year=&fullimage=false", False '<==concatenate caseid into URL
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = .responseText
End With
Set html = New HTMLDocument
html.body.innerHTML = sResponse
Dim tables As Object, i As Long
Set tables = html.querySelectorAll("table")
For i = 0 To tables.Length - 1
clipboard.SetText tables.item(i).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm '<< Function below modified from here
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
使用 class 来保存 xmlhttp 对象可能会是什么样子(未测试):
Class clsHTTP:
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal URL As String) As String
Dim sResponse As String
With http
.Open "GET", URL, False
.send
sResponse = .responseText
End With
End Function
标准模块1:
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Dim initialLinksURL As String, http As clsHTTP, i As Long, j As Long, newURL As String
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set html = New HTMLDocument
initialLinksURL = "https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search"
Dim linksAndIds()
linksAndIds = GetLinksAndIds(initialLinksURL)
For i = LBound(linksAndIds, 2) To UBound(linksAndIds, 2)
newURL = "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=" & linksAndIds(i, 2) & "&year=&fullimage=false"
html.body.innerHTML = http.GetString(newURL)
Dim tables As Object
Set tables = html.querySelectorAll("table")
For j = 0 To tables.Length - 1
clipboard.SetText tables.item(j).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
Next
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
Public Function GetLinksAndIds(ByVal URL) As Variant
Dim ie As InternetExplorer, i As Long
Set ie = New InternetExplorer
With ie
.Visible = True
.navigate URL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim caseLinks As Object, id As String, newURL As String
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
.Quit
End With
GetLinksAndIds = linksAndIds
End Function
所有 Internet Explorer 选项:
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Dim initialLinksURL As String, i As Long, j As Long, newURL As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set html = New HTMLDocument
initialLinksURL = "https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search"
Dim ie As InternetExplorer, caseLinks As Object
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 initialLinksURL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
For i = LBound(linksAndIds, 2) To 2 ' UBound(linksAndIds, 2)
newURL = "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=" & linksAndIds(i, 2) & "&year=&fullimage=false"
.Navigate2 newURL
While .Busy Or .readyState < 4: DoEvents: Wend
Dim tables As Object
Set tables = .document.querySelectorAll("table")
For j = 0 To tables.Length - 1
clipboard.SetText tables.item(j).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
Next
.Quit
End With
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
我当前的项目包括从 HTML 源代码中检索数据。 具体来说,我正在查看此网站上的崩溃案例:
https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=112007272
我想通过查找特定 tags/IDs 的 .innertext
从 HTML 收集所有相关数据。
到目前为止我的代码:
Sub ExtractData()
mystart:
'First I create two Internet Explorer object
Set objIE = CreateObject("InternetExplorer.Application") 'this browser contains the list of cases
objIE.Top = 0
objIE.Left = 0
objIE.Width = 1600
objIE.Height = 900
objIE.Visible = True 'We can see IE
Set objIEdata = CreateObject("InternetExplorer.Application") 'this browser opens the specific case
objIEdata.Top = 0
objIEdata.Left = 0
objIEdata.Width = 1600
objIEdata.Height = 900
objIEdata.Visible = True 'We can see IE
On Error Resume Next
objIE.navigate ("https://crashviewer.nhtsa.dot.gov/LegacyCDS/Index") 'url of website
Do
DoEvents
If Err.Number <> 0 Then
objIE.Quit
Set objIE = Nothing
GoTo mystart:
End If
Loop Until objIE.readystate = 4
'we define an object variable Alllinks and loop through all the links to search for
Set aAlllinks = objIE.document.getElementsByTagName("button") 'looks for Search Button
For Each Hyperlink In aAlllinks
If Hyperlink.innertext = " Search" Then
Hyperlink.Click
Exit For
Else
MsgBox "Search Button was not found. Please improve code!"
End If
Next
Application.Wait (Now + TimeValue("0:00:02"))
Set bAlllinks = objIE.document.getElementsByTagName("a") 'all Hyperlinks on webpage start with Tag "a"
For Each Hyperlink In bAlllinks
If UBound(Split(Hyperlink.innertext, "-")) = 2 And Len(Hyperlink.innertext) = 11 Then 'case specific to find the Hyperlinks which contain cases
Debug.Print Hyperlink.innertext
'2nd IE is used for each case
restart:
objIEdata.navigate (Hyperlink.href) 'url of each case
Do
DoEvents
If Err.Number <> 0 Then
objIEdata.Quit
Set objIE = Nothing
GoTo restart:
End If
Loop Until objIEdata.readystate = 4
Set register = objIEdata.document.getElementByTagName("tbody") 'objIEdata.document.getElementByID("main").getElementByID("mainSection") '.getElementByID("bodyMain").getElementsByTagName("tbody")
For Each untermenue In register
Debug.Print untermenue.innerHTML
Next
Application.Wait (Now + TimeValue("0:00:02"))
End If
Next
objIE.Quit
objIEdata.Quit
End Sub
请注意,IE 的可见性只是出于调试原因。
让我困惑的部分是
Set register = objIEdata.document.getElementByTagName("tbody")
.
如果我寻找 .TagName("tbody")
,变量寄存器返回空,如果我寻找 .ID("bodyMain")
,也会发生同样的情况。不幸的是,我不熟悉 HTML 以及 VBA 如何与 HTML 文档交互。我的印象是我可以通过它们的 ID 来处理所有元素,如果它们恰好有一个,但这似乎不起作用。
我是否需要自己完成 HTML "branches" 或者代码是否应该能够找到每个 ID,无论它是在 "child" 中找到的?
非常感谢
你问的是一个很大的要求,所以我将提供一些指导和起始代码。我的代码应该写出所有的表格,但你会想尝试一下以获得你想要的格式。围绕有效选择元素肯定有足够的逻辑,这应该有所帮助。 * 由于时间限制,我还没有测试使用 class 循环遍历所有检索到的 ID,但测试了个别情况和所有 ID 的检索。
获取初始案例链接和 ID:
我可能会使用一个函数 returning 一个包含链接和 ID 的数组。如果您提取 ID,它们可以传递给我在下面显示的 XMLHTTP 请求。
URL 是 https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search
Public Function GetLinksAndIds(ByVal URL) As Variant
Dim ie As InternetExplorer, i As Long
Set ie = New InternetExplorer
With ie
.Visible = True
.navigate2 URL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim caseLinks As Object, id As String, newURL As String
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
.Quit
End With
GetLinksAndIds = linksAndIds
End Function
示例 return 值:
对于每种情况 - 使用 XMLHTTP:
我很想避免使用 IE 并使用 XMLHTTP
request(url 编码的查询字符串 return 使用打印选项获得更具可读性的页面版本)。虽然我已经使用 css 选择器进行了解析,但您可以将响应读入 MSXML2.DOMDocument60
并使用 XPath
进行查询。您可以将 caseid 连接成 URL。
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=112007272&year=&fullimage=false", False '<==concatenate caseid into URL
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = .responseText
End With
Set html = New HTMLDocument
html.body.innerHTML = sResponse
Dim tables As Object, i As Long
Set tables = html.querySelectorAll("table")
For i = 0 To tables.Length - 1
clipboard.SetText tables.item(i).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm '<< Function below modified from here
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
使用 class 来保存 xmlhttp 对象可能会是什么样子(未测试):
Class clsHTTP:
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal URL As String) As String
Dim sResponse As String
With http
.Open "GET", URL, False
.send
sResponse = .responseText
End With
End Function
标准模块1:
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Dim initialLinksURL As String, http As clsHTTP, i As Long, j As Long, newURL As String
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set html = New HTMLDocument
initialLinksURL = "https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search"
Dim linksAndIds()
linksAndIds = GetLinksAndIds(initialLinksURL)
For i = LBound(linksAndIds, 2) To UBound(linksAndIds, 2)
newURL = "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=" & linksAndIds(i, 2) & "&year=&fullimage=false"
html.body.innerHTML = http.GetString(newURL)
Dim tables As Object
Set tables = html.querySelectorAll("table")
For j = 0 To tables.Length - 1
clipboard.SetText tables.item(j).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
Next
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
Public Function GetLinksAndIds(ByVal URL) As Variant
Dim ie As InternetExplorer, i As Long
Set ie = New InternetExplorer
With ie
.Visible = True
.navigate URL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim caseLinks As Object, id As String, newURL As String
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
.Quit
End With
GetLinksAndIds = linksAndIds
End Function
所有 Internet Explorer 选项:
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Dim initialLinksURL As String, i As Long, j As Long, newURL As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set html = New HTMLDocument
initialLinksURL = "https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search"
Dim ie As InternetExplorer, caseLinks As Object
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 initialLinksURL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
For i = LBound(linksAndIds, 2) To 2 ' UBound(linksAndIds, 2)
newURL = "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=" & linksAndIds(i, 2) & "&year=&fullimage=false"
.Navigate2 newURL
While .Busy Or .readyState < 4: DoEvents: Wend
Dim tables As Object
Set tables = .document.querySelectorAll("table")
For j = 0 To tables.Length - 1
clipboard.SetText tables.item(j).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
Next
.Quit
End With
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function