将 Web 浏览器自动化转换为 XmlHTTPRequest
Converting Web Browser Automation to XmlHTTP Request
我创建了一个从 Brief profiles (BP)
抓取相关信息的宏,可以在以下位置搜索:https://echa.europa.eu/information-on-chemicals
这可以使用对 Brief Profile
的 URL 的 XMLHTTP 请求,并且工作正常。
我现在想创建一个宏来搜索同一个网站以找到简介的 URL(href)。
作为 VBA 的初学者,我已经使用浏览器成功实现了这一点,但我希望将其转换为 XML HTTP 请求以提高效率。
使用 IE 浏览器自动化:
Sub Gethref()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLButtom As MSHTML.IHTMLElement
Dim HTMLhref As MSHTML.IHTMLElement
'Go to Website
IE.Visible = True
IE.navigate "https://echa.europa.eu/information-on-chemicals"
'Check Website is ready for search and set HTMLDoc to IE.Document for elements
Do While IE.readyState <> READYSTATE_COMPLETE
Loop
Set HTMLDoc = IE.document
'Set value of Searchbox to keyword
Set HTMLInput = HTMLDoc.getElementById("_disssimplesearch_WAR_disssearchportlet_sskeywordKey")
HTMLInput.Value = "Potassium mercaptoacetate"
'Search for Result
Set HTMLButton = HTMLDoc.getElementById("_disssimplesearchhomepage_WAR_disssearchportlet_searchButton")
HTMLButton.Click
'Check page has loaded
Do While IE.readyState = READYSTATE_COMPLETE or IE.Busy
Loop
Set HTMLDoc = IE.document
'Find Desired href
Set HTMLhref = HTMLDoc.getElementsByClassName("briefProfileLink")(0)
Debug.Print HTMLhref.getAttribute("href")
End Sub
这应该将 Potassium mercaptoacetate
的 href 打印为 https://echa.europa.eu/brief-profile/-/briefprofile/100.000.602
我已经开始尝试使用 XML HTTP 尽可能多地转换,但我 运行 转换成我不太了解的问题
使用 XML HTTP 请求(不工作)
Sub Gethref()
Dim XMLPage As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLButtom As MSHTML.IHTMLElement
Dim HTMLhref As MSHTML.IHTMLElement
'Go to Website
XMLPage.Open "GET", "https://echa.europa.eu/information-on-chemicals", False
XMLPage.send
'Set value of Searchbox to keyword
Set HTMLInput = HTMLDoc.getElementById("_disssimplesearch_WAR_disssearchportlet_sskeywordKey")
HTMLInput.Value
'Search for Result
Set HTMLButton = HTMLDoc.getElementById("_disssimplesearchhomepage_WAR_disssearchportlet_searchButton")
HTMLButton.Click
'Check page has loaded
HTMLDoc.body.innerHTML = IE.document.responseText
'Find Desired href
Set HTMLhref = HTMLDoc.getElementsByClassName("briefProfileLink")(0)
Debug.Print HTMLhref.getAttribute("href")
End Sub
我会在这方面取得进展时进行更新,但如果有人能提供帮助,那就太好了。
好的,这应该可以了。结果表明,您需要使用适当的参数发出 post HTTP 请求,以获得包含所需 links 的所需响应。
Public Sub GetContent()
Const Url = "https://echa.europa.eu/search-for-chemicals?p_auth=5ayUnMyz&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
Dim oHttp As Object, oHtml As HTMLDocument, MyDict As Object, I&, R&
Dim DictKey As Variant, payload$, searchKeyword$, Ws As Worksheet
Set oHtml = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
Set Ws = ThisWorkbook.Worksheets("Sheet1")
searchKeyword = "Acetone"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_formDate") = "1621017052777" 'timestamp
MyDict("_disssimplesearch_WAR_disssearchportlet_searchOccurred") = "true"
MyDict("_disssimplesearch_WAR_disssearchportlet_sskeywordKey") = searchKeyword
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = IIf(Len(DictKey) = 0, WorksheetFunction.encodeURL(DictKey) & "=" & WorksheetFunction.encodeURL(MyDict(DictKey)), _
payload & "&" & WorksheetFunction.encodeURL(DictKey) & "=" & WorksheetFunction.encodeURL(MyDict(DictKey)))
Next DictKey
With oHttp
.Open "POST", Url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send (payload)
oHtml.body.innerHTML = .responseText
End With
With oHtml.querySelectorAll("table.table > tbody > tr > td > a.substanceNameLink")
For I = 0 To .Length - 1
R = R + 1: Ws.Cells(R, 1) = .item(I).getAttribute("href")
Next I
End With
End Sub
如果您只对第一个 link 感兴趣,请尝试以下而不是最后一个 with 块:
MsgBox oHtml.querySelector("table.table > tbody > tr > td > a.substanceNameLink").getAttribute("href")
或者您可以直接从开发工具中复制这些参数并使用它们:
Public Sub GetContent()
Const Url = "https://echa.europa.eu/search-for-chemicals?"
Dim oHttp As Object, oHtml As HTMLDocument
Dim payload$, Ws As Worksheet, urlSuffix$
Set oHtml = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set Ws = ThisWorkbook.Worksheets("Sheet1")
urlSuffix = "p_auth=69hDou3E&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=" & _
"_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=" & _
"doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals" & _
"%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview" & _
"%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
payload = "_disssimplesearchhomepage_WAR_disssearchportlet_formDate=1621042609544&_disssimplesearch_WAR_disssearchportlet_searchOccurred=" & _
"true&_disssimplesearch_WAR_disssearchportlet_sskeywordKey=Acetone&_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer" & _
"=true&_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox=on"
With oHttp
.Open "POST", Url & urlSuffix, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send (payload)
oHtml.body.innerHTML = .responseText
End With
Debug.Print oHtml.querySelector("table.table > tbody > tr > td > a.substanceNameLink").getAttribute("href")
End Sub
我创建了一个从 Brief profiles (BP)
抓取相关信息的宏,可以在以下位置搜索:https://echa.europa.eu/information-on-chemicals
这可以使用对 Brief Profile
的 URL 的 XMLHTTP 请求,并且工作正常。
我现在想创建一个宏来搜索同一个网站以找到简介的 URL(href)。
作为 VBA 的初学者,我已经使用浏览器成功实现了这一点,但我希望将其转换为 XML HTTP 请求以提高效率。
使用 IE 浏览器自动化:
Sub Gethref()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLButtom As MSHTML.IHTMLElement
Dim HTMLhref As MSHTML.IHTMLElement
'Go to Website
IE.Visible = True
IE.navigate "https://echa.europa.eu/information-on-chemicals"
'Check Website is ready for search and set HTMLDoc to IE.Document for elements
Do While IE.readyState <> READYSTATE_COMPLETE
Loop
Set HTMLDoc = IE.document
'Set value of Searchbox to keyword
Set HTMLInput = HTMLDoc.getElementById("_disssimplesearch_WAR_disssearchportlet_sskeywordKey")
HTMLInput.Value = "Potassium mercaptoacetate"
'Search for Result
Set HTMLButton = HTMLDoc.getElementById("_disssimplesearchhomepage_WAR_disssearchportlet_searchButton")
HTMLButton.Click
'Check page has loaded
Do While IE.readyState = READYSTATE_COMPLETE or IE.Busy
Loop
Set HTMLDoc = IE.document
'Find Desired href
Set HTMLhref = HTMLDoc.getElementsByClassName("briefProfileLink")(0)
Debug.Print HTMLhref.getAttribute("href")
End Sub
这应该将 Potassium mercaptoacetate
的 href 打印为 https://echa.europa.eu/brief-profile/-/briefprofile/100.000.602
我已经开始尝试使用 XML HTTP 尽可能多地转换,但我 运行 转换成我不太了解的问题
使用 XML HTTP 请求(不工作)
Sub Gethref()
Dim XMLPage As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLButtom As MSHTML.IHTMLElement
Dim HTMLhref As MSHTML.IHTMLElement
'Go to Website
XMLPage.Open "GET", "https://echa.europa.eu/information-on-chemicals", False
XMLPage.send
'Set value of Searchbox to keyword
Set HTMLInput = HTMLDoc.getElementById("_disssimplesearch_WAR_disssearchportlet_sskeywordKey")
HTMLInput.Value
'Search for Result
Set HTMLButton = HTMLDoc.getElementById("_disssimplesearchhomepage_WAR_disssearchportlet_searchButton")
HTMLButton.Click
'Check page has loaded
HTMLDoc.body.innerHTML = IE.document.responseText
'Find Desired href
Set HTMLhref = HTMLDoc.getElementsByClassName("briefProfileLink")(0)
Debug.Print HTMLhref.getAttribute("href")
End Sub
我会在这方面取得进展时进行更新,但如果有人能提供帮助,那就太好了。
好的,这应该可以了。结果表明,您需要使用适当的参数发出 post HTTP 请求,以获得包含所需 links 的所需响应。
Public Sub GetContent()
Const Url = "https://echa.europa.eu/search-for-chemicals?p_auth=5ayUnMyz&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
Dim oHttp As Object, oHtml As HTMLDocument, MyDict As Object, I&, R&
Dim DictKey As Variant, payload$, searchKeyword$, Ws As Worksheet
Set oHtml = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
Set Ws = ThisWorkbook.Worksheets("Sheet1")
searchKeyword = "Acetone"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_formDate") = "1621017052777" 'timestamp
MyDict("_disssimplesearch_WAR_disssearchportlet_searchOccurred") = "true"
MyDict("_disssimplesearch_WAR_disssearchportlet_sskeywordKey") = searchKeyword
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = IIf(Len(DictKey) = 0, WorksheetFunction.encodeURL(DictKey) & "=" & WorksheetFunction.encodeURL(MyDict(DictKey)), _
payload & "&" & WorksheetFunction.encodeURL(DictKey) & "=" & WorksheetFunction.encodeURL(MyDict(DictKey)))
Next DictKey
With oHttp
.Open "POST", Url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send (payload)
oHtml.body.innerHTML = .responseText
End With
With oHtml.querySelectorAll("table.table > tbody > tr > td > a.substanceNameLink")
For I = 0 To .Length - 1
R = R + 1: Ws.Cells(R, 1) = .item(I).getAttribute("href")
Next I
End With
End Sub
如果您只对第一个 link 感兴趣,请尝试以下而不是最后一个 with 块:
MsgBox oHtml.querySelector("table.table > tbody > tr > td > a.substanceNameLink").getAttribute("href")
或者您可以直接从开发工具中复制这些参数并使用它们:
Public Sub GetContent()
Const Url = "https://echa.europa.eu/search-for-chemicals?"
Dim oHttp As Object, oHtml As HTMLDocument
Dim payload$, Ws As Worksheet, urlSuffix$
Set oHtml = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set Ws = ThisWorkbook.Worksheets("Sheet1")
urlSuffix = "p_auth=69hDou3E&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=" & _
"_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=" & _
"doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals" & _
"%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview" & _
"%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
payload = "_disssimplesearchhomepage_WAR_disssearchportlet_formDate=1621042609544&_disssimplesearch_WAR_disssearchportlet_searchOccurred=" & _
"true&_disssimplesearch_WAR_disssearchportlet_sskeywordKey=Acetone&_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer" & _
"=true&_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox=on"
With oHttp
.Open "POST", Url & urlSuffix, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send (payload)
oHtml.body.innerHTML = .responseText
End With
Debug.Print oHtml.querySelector("table.table > tbody > tr > td > a.substanceNameLink").getAttribute("href")
End Sub