在 VBA 中优化 HTML 解析

Optimizing HTML Parsing in VBA

我有一些 VBA 代码可以向 https://nt3-s.zacks.com/fundamreports/Default.aspx 发出请求并将所有数据提取到工作表中。我的问题是我的解析技术需要很长时间。有没有人对我如何改进下面的代码有什么建议?也许我应该使用 QuerySelectorAll() 或其他一些方法...

Sub Financials(ticker, SheetName As String)
Dim XMLPage As New MSXML2.XMLHTTP60
Dim RP1, RP2, QP1, QP2, QP3, QP4, QP5, QP6 As String
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim HTMLRow, HTMLCell As MSHTML.IHTMLElement
Dim i As Integer
Dim r, c, offset, shift As Integer

'Application.ScreenUpdating = False

shift = 50

' STANDARDIZED FINANCIAL STATEMENTS

' Generate income statements
'============================

XMLPage.Open "Post", "https://nt3-s.zacks.com/fundamreports/Default.aspx", False

' Header
XMLPage.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"

' Body Parameters
' Required parameters so every table value can be editted
RP1 = "__VIEWSTATE=%2FwEPDwUJNDg5NjgzODkyD2QWAgIDD2QWAgIBD2QWBAIPDxAPFgIeB0VuYWJsZWRnZGRkZAITDxAPFgIfAGdkZGRkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQhidG5FeGNlbHC%2FJVg5ST2AoHtph1XAsQL0bObX"

' String is too long to be typed in one line...
a1 = "%2FwEWdALpx6COCwKm07CqBwKSh%2B%2FQCgL96MW%2BBgLN6MW%2BBgLav7PDDQLCv%2FPADQLNv%2FPADQLcv%2FPADQLfv%2FPADQLev%2FPADQLZv%2FPADQLYv%2FPADQLbv%2FPADQLav%2FPADQLFv%2FPADQLduMOkAwLq0v%2FjCQLq0uO8AQLq0teZCgLq0rvyAwLq0q%2FPDALq0pOoBALq0oeFDQLq0uvhBgLq0p%2BJAwLq0oPiDALX%2B53NDwLX%2B4GmBwLX%2B%2FUCAtf72d8JAtf7zbgBAtf7sZUKAtf7pe4DAtf7icsMAtf7vfIJAtf7oc8CApOYyLkHApOYvBICk5ig7wkCk5iUyAICk5j4pAoCk5jsgQMCk5jQ2gwCk5jEtwQCk5jo3gICk5jcuwoC%2BKHuog0C%2BKHS%2FwYC%2BKHG2A8C%2BKGqtQcC%2BKGeDgL4oYLrCQL4ofbHAgL4odqgCgL4oY7ICAL4ofIkAuW2jIwDAuW28OgMAuW25MUFAsTR38AHAvO744cNAvO7%2F9gFAvO7y%2F0OAvO7p5YHAvO7s6sIAvO7j0wC87ub4QkC87v3hQIC87uD7QcC87ufhggCzpKBqQsCzpKdwgMCzpLp5gQCzpLFuw0CzpLR3AUCzpKt8Q4CzpK5igcCzpKVrwgCzpKhlg0CzpK9qwYCivHU3Q"
a2 = "MCivGg9gQCivG8iw0CivGIrAYCivHkwA4CivHw5QcCivHMvggCivHYUwKK8fS6BgKK8cDfDgLhyPLGCQLhyM6bAgLhyNq8CwLhyLbRAwLhyILqBALhyJ6PDQLhyOqjBgLhyMbEDgLhyJKsDALhyO7ABAL835DoBwL83%2ByMCAL83%2FihAQLnzLDSBAL4zLDSBAL5zLDSBAL6zLDSBAL8zLDSBAL9zLDSBAK3wpPeDgKFwpPeDgKuh8zbAwKhh8zbAwLR2eHwDAKFt7SHCcQGh8eHqGGqe%2F6U9Wz%2FpSk9FF8d"

RP2 = "&__EVENTVALIDATION=" & a1 & a2

' Query Parameters
QP1 = "&tbTicker=" & ticker
QP2 = "&ddBasis=Q"
QP3 = "&ddPeriod=0"
QP4 = "&ddFrom=" & CStr(Year(DateAdd("yyyy", -10, Date)))
QP5 = "&ddTo=" & CStr(Year(Date))
QP6 = "&ddFormType=0"

XMLPage.send RP1 & RP2 & QP1 & QP2 & QP3 & QP4 & QP5 & QP6

HTMLDoc.body.innerHTML = XMLPage.responseText

r = 1
c = 1
offset = 0 * shift

Sheets(SheetName).Select

Cells.Select
Selection.ClearContents

'Extract data from table
For Each HTMLRow In HTMLDoc.getElementsByTagName("tr")
    For Each HTMLCell In HTMLRow.getElementsByTagName("th")
        Cells(r, c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
    For Each HTMLCell In HTMLRow.getElementsByTagName("td")
        Cells(r, c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
c = 1
r = r + 1
Next HTMLRow

' Generate balance statements
'======================================
'Request
XMLPage.Open "Post", "https://nt3-s.zacks.com/fundamreports/Default.aspx", False

' Header
XMLPage.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"

QP6 = "&ddFormType=1"

XMLPage.send RP1 & RP2 & QP1 & QP2 & QP3 & QP4 & QP5 & QP6

HTMLDoc.body.innerHTML = XMLPage.responseText

r = 1
c = 1
offset = 1 * shift

'Extract data from table
For Each HTMLRow In HTMLDoc.getElementsByTagName("tr")
    For Each HTMLCell In HTMLRow.getElementsByTagName("th")
        Cells(r, c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
    For Each HTMLCell In HTMLRow.getElementsByTagName("td")
        Cells(r, c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
c = 1
r = r + 1
Next HTMLRow

编辑:我更改了代码以包含有关我的程序的更多详细信息。该子程序将同时收集多家公司的损益表、资产负债表、现金流量和其他指标。我包括了几个我如何使用 POST 方法解析数据的实例。

通过每次迭代计算,将 Application.Calculation = xlManual 添加到宏的开头,将 Application.Calculation = xlAutomatic 添加到宏的末尾,这将防止在每次通过循环后更新计算。