Vba 在网络抓取时删除 table

Vba getting rid of table while web scraping

任何人都可以帮助更进一步吗?

我在这里所做的是,我从代码块内部给出的网站中获取了 getElementById、tagName 表名,尽管只有 class 个 div“数据”。然后,我会将所有数据(仅货币汇率和日期)放入工作表的 excel 单元格中。但它也给了我日历日,我想摆脱以 Debug.Print 模式显示的日历日。但无法找到正确的日历标签名称以将其从代码中排除。现在,我只需要摆脱日历日的帮助;代码如下

Sub gettingTablesfromCBR()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement

IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=R01100&UniDbQuery.From=07.08.2021&UniDbQuery.To=05.11.2021"

    Do While IE.ReadyState <> READYSTATE_COMPLETE
    Loop
   
Set HTMLDOC = IE.Document
Set HTMLDIV = HTMLDOC.getElementById("data")
Set HTMLTABLES = HTMLDOC.getElementsByTagName("table")
Dim RowText As String
For Each HTMLTABLE In HTMLTABLES
    
    For Each TableSection In HTMLTABLE.Children

    
            For Each TableRow In TableSection.Children
                RowText = ""
                For Each TableCell In TableRow.Children
                    RowText = RowText & vbTab & TableCell.innerText
                Next TableCell
                Debug.Print , , RowText
            Next TableRow
    
    Next TableSection
    
Next HTMLTABLE

End Sub

更新: 您可以在最里面的 For 循环中添加一个计数器,在该循环开始之前将其重置为 1。然后您可以测试此计数器以查看它是否是该行的第一个单元格并跳过它。

这个示例对您的代码进行了一些小的调整以仅针对 table(具有 dataclassname):

Sub gettingTablesfromCBR()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim Unit As String
Dim Rate As String
Dim TableCellCount As Integer

IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=R01100&UniDbQuery.From=07.08.2021&UniDbQuery.To=05.11.2021"

    Do While IE.ReadyState <> READYSTATE_COMPLETE
    Loop
   
Set HTMLDOC = IE.Document
Set HTMLTABLES = HTMLDOC.getElementsByClassName("data")
For Each HTMLTABLE In HTMLTABLES 'Table
    For Each TableSection In HTMLTABLE.Children 'Body
            For Each TableRow In TableSection.Children
                RowText = ""
                TableCellCount = 1
                For Each TableCell In TableRow.Children
                    If TableCellCount = 2 Then Unit = TableCell.innerText
                    If TableCellCount = 3 Then Rate = TableCell.innerText
                    TableCellCount = TableCellCount + 1
                Next TableCell
                Debug.Print Unit, Rate
            Next TableRow
    Next TableSection
Next


End Sub

在@JNevill 的帮助下,我解决并改进了代码。现在,如果任何有兴趣的人都可以轻松地将数字作为日期,那么费率的结果将以 table 形式放入 xl sheet.

Sub gettingTablesfromCBR2()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim Unit As String
Dim Rate, DateR As String
Dim TableCellCount, RowNum As Integer
Dim inputdate_from, inputdate_to, inputcurr_val As Variant

inputdate_from = ThisWorkbook.Worksheets("PrimoPagina").Range("A3").Value
inputdate_to = ThisWorkbook.Worksheets("PrimoPagina").Range("E3").Value
inputcurr_val = ThisWorkbook.Worksheets("PrimoPagina").Range("B1").Value
If inputcurr_val = "BLG" Then
inputcurr_val = "R01100"
Else
inputcurr_val = "R01239" 'EUR
End If

IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=" & inputcurr_val & "&UniDbQuery.From=" & inputdate_from & "&UniDbQuery.To=" & inputdate_to


    Do While IE.ReadyState <> READYSTATE_COMPLETE
    Loop
   
Set HTMLDOC = IE.Document
Set HTMLTABLES = HTMLDOC.getElementsByClassName("data")
For Each HTMLTABLE In HTMLTABLES 'Table
    For Each TableSection In HTMLTABLE.Children 'Body
            For Each TableRow In TableSection.Children
                RowText = ""
                TableCellCount = 1
                For Each TableCell In TableRow.Children
                    If TableCellCount = 1 Then DateR = TableCell.innerText
                    If TableCellCount = 2 Then Unit = TableCell.innerText
                    If TableCellCount = 3 Then Rate = TableCell.innerText
                    TableCellCount = TableCellCount + 1
                Next TableCell
                Debug.Print Unit, Rate
                
                RowNum = RowNum + 1
                ThisWorkbook.Worksheets("Results").Cells(RowNum, 3).Value = Unit
                ThisWorkbook.Worksheets("Results").Cells(RowNum, 2).Value = Rate
               ThisWorkbook.Worksheets("Results").Cells(RowNum, 1).Value = DateR
                
            Next TableRow
    Next TableSection
Next


End Sub