Sub 和 Function 独立工作但不一起工作

Sub and Function Work Independently but not together

这个问题是我发布的尝试和 webscrape brief profiles of https://echa.europa.eu/information-on-chemicals

的一个小系列的一部分

该代码使用 Public 函数 GetUrl() 检索所需简要资料的 url。然后使用 SubRoutine GetContents() 来抓取所需的物理和化学特性数据。

令人费解的是,我得到一个 运行时间错误 91。这很奇怪,因为 GetContents() 和 GetUrl() 在彼此独立的情况下工作。

有没有人不介意看一看那会很棒。

        Sub GetContents()

Dim xmlReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
            
Dim SubSectList As MSHTML.IHTMLElement
Dim SubSects As MSHTML.IHTMLElementCollection
Dim SubSect As MSHTML.IHTMLElement
                    
Url = GetUrl()
                    
xmlReq.Open "Get", Url, False
xmlReq.send
            
           
If xmlReq.Status <> 200 Then
            
MsgBox "Problem" & vbNewLine & xmlReq.Status & " - " & xmlReq.statusText
Exit Sub

End If
            
HTMLDoc.body.innerHTML = xmlReq.responseText
            
Set SubSectList = HTMLDoc.getElementsByClassName("col-xs-12 col-lg-10 MainContent")(1)
            
Set SubSects = SubSectList.getElementsByTagName("dt")
              
For Each SubSect In SubSects
Debug.Print SubSect.innerText & " : "; SubSect.NextSibling.innerText
Next SubSect

End Sub


Public Function GetUrl() As String
        Const Url = "https://echa.europa.eu/search-for-chemicals?p_auth=5ayUnMyz&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
        Dim oHttp As Object, oHtml As HTMLDocument, MyDict As Object, I&, R&
        Dim DictKey As Variant, payload$, searchKeyword$, Ws As Worksheet
        
        Set oHtml = New HTMLDocument
        Set oHttp = CreateObject("MSXML2.XMLHTTP")
        Set MyDict = CreateObject("Scripting.Dictionary")
        Set Ws = ThisWorkbook.Worksheets("Sheet1")
    
'Keyword can Be any chemical usually set to a cell value i.e. Range("a1").Value
        searchKeyword = "Acetone" '
        
        MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_formDate") = "1621017052777" 'timestamp
        MyDict("_disssimplesearch_WAR_disssearchportlet_searchOccurred") = "true"
        MyDict("_disssimplesearch_WAR_disssearchportlet_sskeywordKey") = searchKeyword
        MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
        MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
    
        payload = ""
        For Each DictKey In MyDict
            payload = IIf(Len(DictKey) = 0, WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)), _
            payload & "&" & WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)))
        Next DictKey
        
        With oHttp
            .Open "POST", Url, False
            .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
            .setRequestHeader "Content-type", "application/x-www-form-urlencoded"
            .send (payload)
            oHtml.body.innerHTML = .responseText
        End With
        
        Debug.Print oHtml.querySelector("table.table > tbody > tr > td > a.substanceNameLink").getAttribute("href")
        GetUrl = oHtml.querySelector("table.table > tbody > tr > td > a.substanceNameLink").getAttribute("href")
    
    End Function

参考文献:

更新:特别奇怪的是,当在代码中给定一个确切的 url 时,以下代码有效:

Sub GetContents()

Dim xmlReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
            
Dim SubSectList As MSHTML.IHTMLElement
Dim SubSects As MSHTML.IHTMLElementCollection
Dim SubSect As MSHTML.IHTMLElement
                    
xmlReq.Open "Get", "https://echa.europa.eu/brief-profile/-/briefprofile/100.028.723", False
xmlReq.send
            
            
If xmlReq.Status <> 200 Then
            
MsgBox "Problem" & vbNewLine & xmlReq.Status & " - " & xmlReq.statusText
Exit Sub

End If
            
HTMLDoc.body.innerHTML = xmlReq.responseText
            
Set SubSectList = HTMLDoc.getElementsByClassName("col-xs-12 col-lg-10 MainContent")(1)
            
Set SubSects = SubSectList.getElementsByTagName("dt")

            
For Each SubSect In SubSects
Debug.Print SubSect.innerText & " : "; SubSect.NextSibling.innerText
Next SubSect

End Sub

然而替换

xmlReq.Open "Get", "https://echa.europa.eu/brief-profile/-/briefprofile/100.028.723", False
xmlReq.send

Url = GetUrl()
xmlReq.Open "Get", url, False
    xmlReq.send

其中 GetUrl() 指的是上述工作 Public 函数

导致代码中断.. 在调试时将 Set SubSects = SubSectList.getElementsByTagName("dt") 作为有问题的行。

当答案中提供的代码为运行时更新结果截图:

您正在提取错误的 url,并且该 URI 的 html 中没有 dt 元素。更改css选择器,简化如下:

Option Explicit

Public Sub GetContents()
    Dim XMLReq As New MSXML2.XMLHTTP60
    Dim HTMLDoc As New MSHTML.HTMLDocument
        
    XMLReq.Open "Get", GetUrl, False
    XMLReq.send
                  
    HTMLDoc.body.innerHTML = XMLReq.responseText
    
    Dim i As Long
    
    With HTMLDoc.querySelectorAll(".EndpointContent dt")
        For i = 0 To .Length - 1
            Debug.Print .Item(i).innerText & " : " & .Item(i).NextSibling.NextSibling.innerText
        Next
    End With
End Sub

Public Function GetUrl() As String
    Const url = "https://echa.europa.eu/search-for-chemicals?p_auth=5ayUnMyz&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
    Dim oHttp As Object, oHtml As HTMLDocument, MyDict As Object, i&, R&
    Dim DictKey As Variant, payload$, searchKeyword$, Ws As Worksheet
        
    Set oHtml = New HTMLDocument
    Set oHttp = CreateObject("MSXML2.XMLHTTP")
    Set MyDict = CreateObject("Scripting.Dictionary")
    Set Ws = ThisWorkbook.Worksheets("Sheet1")
    
    'Keyword can Be any chemical usually set to a cell value i.e. Range("a1").Value
    searchKeyword = "Acetone"                    '
        
    MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_formDate") = "1621017052777" 'timestamp
    MyDict("_disssimplesearch_WAR_disssearchportlet_searchOccurred") = "true"
    MyDict("_disssimplesearch_WAR_disssearchportlet_sskeywordKey") = searchKeyword
    MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
    MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
    
    payload = ""
        
    For Each DictKey In MyDict
        payload = IIf(Len(DictKey) = 0, WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)), _
                      payload & "&" & WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)))
    Next DictKey
        
    With oHttp
        .Open "POST", url, False
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
        .setRequestHeader "Content-type", "application/x-www-form-urlencoded"
        .send (payload)
        oHtml.body.innerHTML = .responseText
    End With
        
    Debug.Print oHtml.querySelector(".briefProfileLink").getAttribute("href")
    GetUrl = oHtml.querySelector(".briefProfileLink").getAttribute("href")
    
End Function