Vba 在网络抓取时删除 table
Vba getting rid of table while web scraping
任何人都可以帮助更进一步吗?
我在这里所做的是,我从代码块内部给出的网站中获取了 getElementById、tagName 表名,尽管只有 class 个 div“数据”。然后,我会将所有数据(仅货币汇率和日期)放入工作表的 excel 单元格中。但它也给了我日历日,我想摆脱以 Debug.Print 模式显示的日历日。但无法找到正确的日历标签名称以将其从代码中排除。现在,我只需要摆脱日历日的帮助;代码如下
Sub gettingTablesfromCBR()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=R01100&UniDbQuery.From=07.08.2021&UniDbQuery.To=05.11.2021"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDOC = IE.Document
Set HTMLDIV = HTMLDOC.getElementById("data")
Set HTMLTABLES = HTMLDOC.getElementsByTagName("table")
Dim RowText As String
For Each HTMLTABLE In HTMLTABLES
For Each TableSection In HTMLTABLE.Children
For Each TableRow In TableSection.Children
RowText = ""
For Each TableCell In TableRow.Children
RowText = RowText & vbTab & TableCell.innerText
Next TableCell
Debug.Print , , RowText
Next TableRow
Next TableSection
Next HTMLTABLE
End Sub
更新:
您可以在最里面的 For
循环中添加一个计数器,在该循环开始之前将其重置为 1
。然后您可以测试此计数器以查看它是否是该行的第一个单元格并跳过它。
这个示例对您的代码进行了一些小的调整以仅针对 table(具有 data
的 classname
):
Sub gettingTablesfromCBR()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim Unit As String
Dim Rate As String
Dim TableCellCount As Integer
IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=R01100&UniDbQuery.From=07.08.2021&UniDbQuery.To=05.11.2021"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDOC = IE.Document
Set HTMLTABLES = HTMLDOC.getElementsByClassName("data")
For Each HTMLTABLE In HTMLTABLES 'Table
For Each TableSection In HTMLTABLE.Children 'Body
For Each TableRow In TableSection.Children
RowText = ""
TableCellCount = 1
For Each TableCell In TableRow.Children
If TableCellCount = 2 Then Unit = TableCell.innerText
If TableCellCount = 3 Then Rate = TableCell.innerText
TableCellCount = TableCellCount + 1
Next TableCell
Debug.Print Unit, Rate
Next TableRow
Next TableSection
Next
End Sub
在@JNevill 的帮助下,我解决并改进了代码。现在,如果任何有兴趣的人都可以轻松地将数字作为日期,那么费率的结果将以 table 形式放入 xl sheet.
中
Sub gettingTablesfromCBR2()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim Unit As String
Dim Rate, DateR As String
Dim TableCellCount, RowNum As Integer
Dim inputdate_from, inputdate_to, inputcurr_val As Variant
inputdate_from = ThisWorkbook.Worksheets("PrimoPagina").Range("A3").Value
inputdate_to = ThisWorkbook.Worksheets("PrimoPagina").Range("E3").Value
inputcurr_val = ThisWorkbook.Worksheets("PrimoPagina").Range("B1").Value
If inputcurr_val = "BLG" Then
inputcurr_val = "R01100"
Else
inputcurr_val = "R01239" 'EUR
End If
IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=" & inputcurr_val & "&UniDbQuery.From=" & inputdate_from & "&UniDbQuery.To=" & inputdate_to
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDOC = IE.Document
Set HTMLTABLES = HTMLDOC.getElementsByClassName("data")
For Each HTMLTABLE In HTMLTABLES 'Table
For Each TableSection In HTMLTABLE.Children 'Body
For Each TableRow In TableSection.Children
RowText = ""
TableCellCount = 1
For Each TableCell In TableRow.Children
If TableCellCount = 1 Then DateR = TableCell.innerText
If TableCellCount = 2 Then Unit = TableCell.innerText
If TableCellCount = 3 Then Rate = TableCell.innerText
TableCellCount = TableCellCount + 1
Next TableCell
Debug.Print Unit, Rate
RowNum = RowNum + 1
ThisWorkbook.Worksheets("Results").Cells(RowNum, 3).Value = Unit
ThisWorkbook.Worksheets("Results").Cells(RowNum, 2).Value = Rate
ThisWorkbook.Worksheets("Results").Cells(RowNum, 1).Value = DateR
Next TableRow
Next TableSection
Next
End Sub
任何人都可以帮助更进一步吗?
我在这里所做的是,我从代码块内部给出的网站中获取了 getElementById、tagName 表名,尽管只有 class 个 div“数据”。然后,我会将所有数据(仅货币汇率和日期)放入工作表的 excel 单元格中。但它也给了我日历日,我想摆脱以 Debug.Print 模式显示的日历日。但无法找到正确的日历标签名称以将其从代码中排除。现在,我只需要摆脱日历日的帮助;代码如下
Sub gettingTablesfromCBR()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=R01100&UniDbQuery.From=07.08.2021&UniDbQuery.To=05.11.2021"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDOC = IE.Document
Set HTMLDIV = HTMLDOC.getElementById("data")
Set HTMLTABLES = HTMLDOC.getElementsByTagName("table")
Dim RowText As String
For Each HTMLTABLE In HTMLTABLES
For Each TableSection In HTMLTABLE.Children
For Each TableRow In TableSection.Children
RowText = ""
For Each TableCell In TableRow.Children
RowText = RowText & vbTab & TableCell.innerText
Next TableCell
Debug.Print , , RowText
Next TableRow
Next TableSection
Next HTMLTABLE
End Sub
更新:
您可以在最里面的 For
循环中添加一个计数器,在该循环开始之前将其重置为 1
。然后您可以测试此计数器以查看它是否是该行的第一个单元格并跳过它。
这个示例对您的代码进行了一些小的调整以仅针对 table(具有 data
的 classname
):
Sub gettingTablesfromCBR()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim Unit As String
Dim Rate As String
Dim TableCellCount As Integer
IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=R01100&UniDbQuery.From=07.08.2021&UniDbQuery.To=05.11.2021"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDOC = IE.Document
Set HTMLTABLES = HTMLDOC.getElementsByClassName("data")
For Each HTMLTABLE In HTMLTABLES 'Table
For Each TableSection In HTMLTABLE.Children 'Body
For Each TableRow In TableSection.Children
RowText = ""
TableCellCount = 1
For Each TableCell In TableRow.Children
If TableCellCount = 2 Then Unit = TableCell.innerText
If TableCellCount = 3 Then Rate = TableCell.innerText
TableCellCount = TableCellCount + 1
Next TableCell
Debug.Print Unit, Rate
Next TableRow
Next TableSection
Next
End Sub
在@JNevill 的帮助下,我解决并改进了代码。现在,如果任何有兴趣的人都可以轻松地将数字作为日期,那么费率的结果将以 table 形式放入 xl sheet.
中Sub gettingTablesfromCBR2()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim Unit As String
Dim Rate, DateR As String
Dim TableCellCount, RowNum As Integer
Dim inputdate_from, inputdate_to, inputcurr_val As Variant
inputdate_from = ThisWorkbook.Worksheets("PrimoPagina").Range("A3").Value
inputdate_to = ThisWorkbook.Worksheets("PrimoPagina").Range("E3").Value
inputcurr_val = ThisWorkbook.Worksheets("PrimoPagina").Range("B1").Value
If inputcurr_val = "BLG" Then
inputcurr_val = "R01100"
Else
inputcurr_val = "R01239" 'EUR
End If
IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=" & inputcurr_val & "&UniDbQuery.From=" & inputdate_from & "&UniDbQuery.To=" & inputdate_to
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDOC = IE.Document
Set HTMLTABLES = HTMLDOC.getElementsByClassName("data")
For Each HTMLTABLE In HTMLTABLES 'Table
For Each TableSection In HTMLTABLE.Children 'Body
For Each TableRow In TableSection.Children
RowText = ""
TableCellCount = 1
For Each TableCell In TableRow.Children
If TableCellCount = 1 Then DateR = TableCell.innerText
If TableCellCount = 2 Then Unit = TableCell.innerText
If TableCellCount = 3 Then Rate = TableCell.innerText
TableCellCount = TableCellCount + 1
Next TableCell
Debug.Print Unit, Rate
RowNum = RowNum + 1
ThisWorkbook.Worksheets("Results").Cells(RowNum, 3).Value = Unit
ThisWorkbook.Worksheets("Results").Cells(RowNum, 2).Value = Rate
ThisWorkbook.Worksheets("Results").Cells(RowNum, 1).Value = DateR
Next TableRow
Next TableSection
Next
End Sub