如何使用 excel vba 获取 href 属性
How to get the href attribute using excel vba
我想检索 html 页面的 <h3>
标签的 href 属性,我可以获取 innerText,但我不知道如何访问 href 属性,文档中有几个 <h3>
标签,但目前我只需要第一个。剩下的我以后再处理...
这是我目前得到的代码
Sub Scrap()
Dim IE As New InternetExplorer
Dim sDD As String
Dim Doc As HTMLDocument
IE.Visible = True
IE.navigate "https://www.oneoiljobsearch.com/senior-reservoir-engineer-jobs/?page=1"
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set Doc = IE.document
sDD = Trim(Doc.getElementsByTagName("h3")(0).innerText)
'sDD contains the string "Senior Reservoir Engineer"
End Sub
下面是要从中提取数据的 HTML 文档的一部分:
<div class="front_job_details">
<h3>
<a href="/jobs/senior-reservoir-engineer-oslo-norway-7?cmp=js&from=job-search-form-2" target="_blank">
Senior Reservoir Engineer
</a>
</h3>
我需要检索的文本是:“/jobs/senior-reservoir-engineer-oslo-norway-7?cmp=js&from=job-search-form-2”
在此先感谢您的帮助。
试试,
dim hr as string
hr = Doc.getElementsByTagName("h3")(0).getElementsByTagName("a")(0).href
debug.print hr
getElementsByTagName 集合是从零开始的,但是 .Length(H3 的#,在其他方法中称为 Count)是从一开始的。
dim i as long
for i=0 to Doc.getElementsByTagName("h3").length - 1
debug.print Doc.getElementsByTagName("h3")(i).getElementsByTagName("a")(0).href
next i
这从每个 H3 中获取第一个 标签。您可以复制该方法以从每个 H3 中获取多个 A。
下面是最终代码,以防对某人有帮助...
Sub MultiScrap()
Dim IE As New InternetExplorer
Dim hr As String
Dim Doc As HTMLDocument
Dim i, j, s As Long
Sheets("LNK0").Activate
myHTTP = Cells(1, 2) 'http address root
lval = Cells(2, 2) 'min number to add to root (page=1..)
uval = Cells(3, 2) 'max number to add to root (page=10..)
s = 5
For i = lval To uval 'loop through all pages
'IE.Visible = True
IE.navigate myHTTP & i
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set Doc = IE.document
For j = 0 To Doc.getElementsByTagName("h3").Length - 1
Cells(s, 1) = s - 4 'Correl
Cells(s, 2) = i 'Page
Cells(s, 3) = j 'Row in page
Cells(s, 4) = Doc.getElementsByTagName("h3")(j).getElementsByTagName("a")(0).href 'Http
hyperAddres = Cells(s, 4).Value
hyperTxt = Cells(s, 4).Value
Cells(s, 4).Hyperlinks.Add _
Anchor:=Range(Cells(s, 4), Cells(s, 4)), _
Address:=hyperAddres, _
TextToDisplay:=hyperTxt 'Hyperlink
s = s + 1
Next j
Doc.Close
Next i
MsgBox "Dishes ready Sir!"
End Sub
我会使用以下更强大的 CSS 选择器方法来获取 class
中的所有 href
Option Explicit
Public Sub GetLinks()
Dim ie As New InternetExplorer, i As Long, aNodeList As Object
With ie
.Visible = True
.navigate "https://www.oneoiljobsearch.com/senior-reservoir-engineer-jobs/?page=1"
While .Busy Or .readyState < 4: DoEvents: Wend
Set aNodeList = .document.querySelectorAll(".front_job_details [href]")
For i = 0 To aNodeList.Length - 1
Debug.Print aNodeList.item(i)
Next
Stop '<=delete me after
'other stuff
.Quit
End With
End Sub
我想检索 html 页面的 <h3>
标签的 href 属性,我可以获取 innerText,但我不知道如何访问 href 属性,文档中有几个 <h3>
标签,但目前我只需要第一个。剩下的我以后再处理...
这是我目前得到的代码
Sub Scrap()
Dim IE As New InternetExplorer
Dim sDD As String
Dim Doc As HTMLDocument
IE.Visible = True
IE.navigate "https://www.oneoiljobsearch.com/senior-reservoir-engineer-jobs/?page=1"
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set Doc = IE.document
sDD = Trim(Doc.getElementsByTagName("h3")(0).innerText)
'sDD contains the string "Senior Reservoir Engineer"
End Sub
下面是要从中提取数据的 HTML 文档的一部分:
<div class="front_job_details">
<h3>
<a href="/jobs/senior-reservoir-engineer-oslo-norway-7?cmp=js&from=job-search-form-2" target="_blank">
Senior Reservoir Engineer
</a>
</h3>
我需要检索的文本是:“/jobs/senior-reservoir-engineer-oslo-norway-7?cmp=js&from=job-search-form-2”
在此先感谢您的帮助。
试试,
dim hr as string
hr = Doc.getElementsByTagName("h3")(0).getElementsByTagName("a")(0).href
debug.print hr
getElementsByTagName 集合是从零开始的,但是 .Length(H3 的#,在其他方法中称为 Count)是从一开始的。
dim i as long
for i=0 to Doc.getElementsByTagName("h3").length - 1
debug.print Doc.getElementsByTagName("h3")(i).getElementsByTagName("a")(0).href
next i
这从每个 H3 中获取第一个 标签。您可以复制该方法以从每个 H3 中获取多个 A。
下面是最终代码,以防对某人有帮助...
Sub MultiScrap()
Dim IE As New InternetExplorer
Dim hr As String
Dim Doc As HTMLDocument
Dim i, j, s As Long
Sheets("LNK0").Activate
myHTTP = Cells(1, 2) 'http address root
lval = Cells(2, 2) 'min number to add to root (page=1..)
uval = Cells(3, 2) 'max number to add to root (page=10..)
s = 5
For i = lval To uval 'loop through all pages
'IE.Visible = True
IE.navigate myHTTP & i
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set Doc = IE.document
For j = 0 To Doc.getElementsByTagName("h3").Length - 1
Cells(s, 1) = s - 4 'Correl
Cells(s, 2) = i 'Page
Cells(s, 3) = j 'Row in page
Cells(s, 4) = Doc.getElementsByTagName("h3")(j).getElementsByTagName("a")(0).href 'Http
hyperAddres = Cells(s, 4).Value
hyperTxt = Cells(s, 4).Value
Cells(s, 4).Hyperlinks.Add _
Anchor:=Range(Cells(s, 4), Cells(s, 4)), _
Address:=hyperAddres, _
TextToDisplay:=hyperTxt 'Hyperlink
s = s + 1
Next j
Doc.Close
Next i
MsgBox "Dishes ready Sir!"
End Sub
我会使用以下更强大的 CSS 选择器方法来获取 class
中的所有 hrefOption Explicit
Public Sub GetLinks()
Dim ie As New InternetExplorer, i As Long, aNodeList As Object
With ie
.Visible = True
.navigate "https://www.oneoiljobsearch.com/senior-reservoir-engineer-jobs/?page=1"
While .Busy Or .readyState < 4: DoEvents: Wend
Set aNodeList = .document.querySelectorAll(".front_job_details [href]")
For i = 0 To aNodeList.Length - 1
Debug.Print aNodeList.item(i)
Next
Stop '<=delete me after
'other stuff
.Quit
End With
End Sub