使用 VBA 抓取两个城市之间的距离
Scraping the distances between two cities using VBA
我正在尝试编写一个工具,让我的同事快速计算成对城市列表之间的距离,以完成季节性但对我们部门来说非常重要的任务。
我目前通过 Google Maps Distance API 使用它,但他们的政策和支付方式的不断变化正在变成一个真正的问题,因为我们才发现该工具已停止在我们需要使用它的时候工作。
这就是为什么我决定解决这个问题并摆脱对 API 的需求。这是我的第一个 Scraping 项目,所以我确信有更好的编码方法,但到目前为止我的解决方案是:
Sub Scrape2()
Dim IE As Object
Dim dist As Variant
Dim URL As String
Dim i As Integer
'Creates an Internet Explorer Object
Set IE = CreateObject("InternetExplorer.application")
URL = "https://www.entrecidadesdistancia.com.br"
With IE
.Visible = False ' "True" makes the object visible
.navigate URL 'Loads the website
'Waits until the site's ready
While IE.Busy
DoEvents
Wend
Do While .Busy
Loop
'Selects "origin" field and inserts text
.Document.getElementById("origem").Value = "Jandira, SP - Brasil"
'Selects "destination" field and inserts text
.Document.getElementById("destino").Value = "Cotia, SP - Brasil"
'Presses the GO button
For Each Button In .Document.getElementsByTagName("button")
Button.Click
Exit For
Next
'Waits until the site's ready
Do While .Busy
Loop
Do While .Busy
Loop
dist = .Document.getElementById("distanciarota").innerText
MsgBox (dist)
End With
IE.Quit
Set IE = Nothing
End Sub
它打开一个 Internet Explorer 对象,将两个城市(我最终将用我的工具中的信息替换)插入到正确的字段中,点击“开始”,加载下一页,并且应该输入我需要的数字一个消息框(当我开始工作时,我将用目标单元格替换它)。
我的最后一个问题是,有一半时间,宏会停止并在这一行声明 "Run-time error '424': Object required":
.Document.getElementById("origem").Value = "Jandira, SP - Brasil"
或在这一行:
dist = .Document.getElementById("distanciarota").innerText
我设法通过在 "problem" 行之前插入另一个等待时间来解决这个问题,但这确实比我想要的更慢了宏。
不过,现在它总是会到达末尾行,但是当它到达时,我的 MessageBox 会变成空白。
这是我需要的信息:
<strong id="distanciarota">12.4 km</strong>
来自这个网站:https://www.entrecidadesdistancia.com.br/calcular-distancia/calcular-distancia.jsp
任何帮助将其放入变量或工作表单元格的人都将非常感激。
这将使用它们的 id 获取两个距离测量值。我添加了一个带超时的循环以允许页面更新。
Option Explicit
Public Sub GetInfo()
Dim ie As New InternetExplorer, t As Date, ele As Object, test As String
Const MAX_WAIT_SEC As Long = 5 '<5 seconds
With ie
.Visible = True
.navigate "https://www.entrecidadesdistancia.com.br"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.getElementById("origem").Value = "Jandira, SP - Brasil"
.getElementById("destino").Value = "Cotia, SP - Brasil"
.querySelector("[onclick='setRout();']").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
DoEvents
On Error Resume Next
Set ele = ie.document.getElementById("distanciarota")
test = ele.innerText
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While test = vbNullString
If Not ele Is Nothing Then
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1) = "rodovias " & ele.innerText
.Cells(2, 1) = "linha reta " & ie.document.getElementById("kmlinhareta").innerText
End With
End If
.Quit
End With
End Sub
您可以将 querySelector 与 CSS id、#
、选择器以相同的方式使用,例如
ie.document.querySelector("#distanciarota").innerText
#If VBA7 Then
Public Declare PtrSafe Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As LongPtr) 'For 64 Bit Systems
#Else
Public Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds as Long) 'For 32 Bit Systems
#End If
Sub Scrape2()
Dim IE As Object
Dim dist As Variant
Dim URL As String
Dim i As Integer
'Creates an Internet Explorer Object
Set IE = CreateObject("InternetExplorer.application")
URL = "https://www.entrecidadesdistancia.com.br"
With IE
.Visible = False ' "True" makes the object visible
.navigate URL 'Loads the website
'Waits until the site's ready
While IE.Busy
DoEvents
Wend
Do While .Busy
Loop
'Add additional delay of 500 milliseconds
Sleep 500
'Selects "origin" field and inserts text
.Document.getElementById("origem").Value = "Jandira, SP - Brasil"
'Selects "destination" field and inserts text
.Document.getElementById("destino").Value = "Cotia, SP - Brasil"
'Presses the GO button
For Each Button In .Document.getElementsByTagName("button")
Button.Click
Exit For
Next
'Waits until the site's ready
Do While .Busy
Loop
Do While .Busy
Loop
'Add additional delay of 500 milliseconds
Sleep 500
dist = .Document.getElementById("distanciarota").innerText
MsgBox (dist)
End With
IE.Quit
Set IE = Nothing
End Sub
'请在导航和单击按钮后额外延迟。 Ie.busy 对象在与服务器交互期间处于活动状态。但是,从服务器浏览器提取数据后需要几毫秒来呈现 html 内容。因此,增加额外的延迟是避免这些错误的最佳做法。
我正在尝试编写一个工具,让我的同事快速计算成对城市列表之间的距离,以完成季节性但对我们部门来说非常重要的任务。
我目前通过 Google Maps Distance API 使用它,但他们的政策和支付方式的不断变化正在变成一个真正的问题,因为我们才发现该工具已停止在我们需要使用它的时候工作。
这就是为什么我决定解决这个问题并摆脱对 API 的需求。这是我的第一个 Scraping 项目,所以我确信有更好的编码方法,但到目前为止我的解决方案是:
Sub Scrape2()
Dim IE As Object
Dim dist As Variant
Dim URL As String
Dim i As Integer
'Creates an Internet Explorer Object
Set IE = CreateObject("InternetExplorer.application")
URL = "https://www.entrecidadesdistancia.com.br"
With IE
.Visible = False ' "True" makes the object visible
.navigate URL 'Loads the website
'Waits until the site's ready
While IE.Busy
DoEvents
Wend
Do While .Busy
Loop
'Selects "origin" field and inserts text
.Document.getElementById("origem").Value = "Jandira, SP - Brasil"
'Selects "destination" field and inserts text
.Document.getElementById("destino").Value = "Cotia, SP - Brasil"
'Presses the GO button
For Each Button In .Document.getElementsByTagName("button")
Button.Click
Exit For
Next
'Waits until the site's ready
Do While .Busy
Loop
Do While .Busy
Loop
dist = .Document.getElementById("distanciarota").innerText
MsgBox (dist)
End With
IE.Quit
Set IE = Nothing
End Sub
它打开一个 Internet Explorer 对象,将两个城市(我最终将用我的工具中的信息替换)插入到正确的字段中,点击“开始”,加载下一页,并且应该输入我需要的数字一个消息框(当我开始工作时,我将用目标单元格替换它)。
我的最后一个问题是,有一半时间,宏会停止并在这一行声明 "Run-time error '424': Object required":
.Document.getElementById("origem").Value = "Jandira, SP - Brasil"
或在这一行:
dist = .Document.getElementById("distanciarota").innerText
我设法通过在 "problem" 行之前插入另一个等待时间来解决这个问题,但这确实比我想要的更慢了宏。
不过,现在它总是会到达末尾行,但是当它到达时,我的 MessageBox 会变成空白。
这是我需要的信息:
<strong id="distanciarota">12.4 km</strong>
来自这个网站:https://www.entrecidadesdistancia.com.br/calcular-distancia/calcular-distancia.jsp
任何帮助将其放入变量或工作表单元格的人都将非常感激。
这将使用它们的 id 获取两个距离测量值。我添加了一个带超时的循环以允许页面更新。
Option Explicit
Public Sub GetInfo()
Dim ie As New InternetExplorer, t As Date, ele As Object, test As String
Const MAX_WAIT_SEC As Long = 5 '<5 seconds
With ie
.Visible = True
.navigate "https://www.entrecidadesdistancia.com.br"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.getElementById("origem").Value = "Jandira, SP - Brasil"
.getElementById("destino").Value = "Cotia, SP - Brasil"
.querySelector("[onclick='setRout();']").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
DoEvents
On Error Resume Next
Set ele = ie.document.getElementById("distanciarota")
test = ele.innerText
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While test = vbNullString
If Not ele Is Nothing Then
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1) = "rodovias " & ele.innerText
.Cells(2, 1) = "linha reta " & ie.document.getElementById("kmlinhareta").innerText
End With
End If
.Quit
End With
End Sub
您可以将 querySelector 与 CSS id、#
、选择器以相同的方式使用,例如
ie.document.querySelector("#distanciarota").innerText
#If VBA7 Then
Public Declare PtrSafe Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As LongPtr) 'For 64 Bit Systems
#Else
Public Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds as Long) 'For 32 Bit Systems
#End If
Sub Scrape2()
Dim IE As Object
Dim dist As Variant
Dim URL As String
Dim i As Integer
'Creates an Internet Explorer Object
Set IE = CreateObject("InternetExplorer.application")
URL = "https://www.entrecidadesdistancia.com.br"
With IE
.Visible = False ' "True" makes the object visible
.navigate URL 'Loads the website
'Waits until the site's ready
While IE.Busy
DoEvents
Wend
Do While .Busy
Loop
'Add additional delay of 500 milliseconds
Sleep 500
'Selects "origin" field and inserts text
.Document.getElementById("origem").Value = "Jandira, SP - Brasil"
'Selects "destination" field and inserts text
.Document.getElementById("destino").Value = "Cotia, SP - Brasil"
'Presses the GO button
For Each Button In .Document.getElementsByTagName("button")
Button.Click
Exit For
Next
'Waits until the site's ready
Do While .Busy
Loop
Do While .Busy
Loop
'Add additional delay of 500 milliseconds
Sleep 500
dist = .Document.getElementById("distanciarota").innerText
MsgBox (dist)
End With
IE.Quit
Set IE = Nothing
End Sub
'请在导航和单击按钮后额外延迟。 Ie.busy 对象在与服务器交互期间处于活动状态。但是,从服务器浏览器提取数据后需要几毫秒来呈现 html 内容。因此,增加额外的延迟是避免这些错误的最佳做法。