【问题标题】:Return URL From First Search Result从第一个搜索结果返回 URL
【发布时间】:2020-06-24 10:35:42
【问题描述】:

我有一个包含大约 25,000 个公司关键字的 Excel 工作簿,我想从中获取公司网站 URL。

我希望运行一个 VBA 脚本,它可以将这些关键字作为 Google 搜索运行,并将第一个结果的 URL 提取到电子表格中。

我找到了similar thread
这样的结果是一败涂地;一些关键字在下一列中返回 URL,其他关键字保持空白。
它似乎还拉取了第一个搜索结果中谷歌优化子链接的 URL,而不是主网站 URL:Google Search Result example

然后我在包含 1,000 个关键字的示例列表中找到了以下代码 here。此博客的作者规定此代码适用于 Mozilla Firefox。

我测试了他也编写的 IE 代码,但这并没有达到相同的结果(它添加了由搜索结果中的描述性文本组成的超链接,而不是原始 URL)。

Firefox 代码一直工作到第 714 行,然后返回错误消息

运行时错误 91:对象变量或未设置块变量

电子表格布局显示成功的结果和宏停止的行

Sub GoogleURL ()

    Dim url As String, lastRow As Long
    Dim XMLHTTP As Object
    Dim html As Object
    Dim objResultDiv As Object
    Dim objH As Object

    lastRow = Range(“A” & Rows.Count).End(xlUp).Row

    For i = 2 To lastRow

        url = “https://www.google.co.uk/search?q=” & Cells(i, 1) & “&rnd=” & WorksheetFunction.RandBetween(1, 10000)

        Set XMLHTTP = CreateObject(“MSXML2.serverXMLHTTP”)

        XMLHTTP.Open “GET”, url, False

        XMLHTTP.setRequestHeader “Content-Type”, “text/xml”

        XMLHTTP.setRequestHeader “User-Agent”, “Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0”

        XMLHTTP.send

        Set html = CreateObject(“htmlfile”)

        html.body.innerHTML = XMLHTTP.ResponseText

        Set objResultDiv = html.getelementbyid(“rso”)

        Set objH = objResultDiv.getelementsbytagname(“h3”)(0)

        Cells(i, 2).Value = objH.innerText

        Set html = CreateObject(“htmlfile”)

        html.body.innerHTML = XMLHTTP.ResponseText

        Set objResultDiv = html.getelementbyid(“rso”)

        Set objH = objResultDiv.getelementsbytagname(“cite”)(0)

        Cells(i, 3).Value = objH.innerText

        DoEvents

    Next

End Sub

【问题讨论】:

  • 这个错误是否有可能源于 Firefox 限制宏能够运行的搜索次数?
  • 如果您认为我的建议可以作为这个问题的答案,那么我建议您接受它作为答案。它可以在未来帮助其他社区成员解决类似的问题。感谢您的理解。

标签: html excel vba internet-explorer firefox


【解决方案1】:

由于Firefox是微软支持范围内的第三方浏览器,我可以帮你检查一下IE浏览器的VBA代码。

您说this link中给出的IE浏览器的VBA代码生成带有链接的描述,您的要求是将描述和链接存储在单独的列中。

我尝试根据您的要求修改该示例代码。

这是该示例中修改后的代码。

Option Explicit
Const TargetItemsQty = 1 ' results for each keyword

Sub GWebSearchIECtl()

    Dim objSheet As Worksheet
    Dim objIE As Object
    Dim x As Long
    Dim y As Long
    Dim strSearch As String
    Dim lngFound As Long
    Dim st As String
    Dim colGItems As Object
    Dim varGItem As Variant
    Dim strHLink As String
    Dim strDescr As String
    Dim strNextURL As String

    Set objSheet = Sheets("Sheet1")
    Set objIE = CreateObject("InternetExplorer.Application")
    objIE.Visible = True ' for debug or captcha request cases
    y = 1 ' start searching for the keyword in the first row
    With objSheet
        .Select
        .Range(.Columns("B:B"), .Columns("B:B").End(xlToRight)).Delete ' clear previous results
        .Range(.Columns("C:C"), .Columns("C:C").End(xlToRight)).Delete ' clear previous results
        .Range("A1").Select
        Do Until .Cells(y, 1) = ""
            x = 2 ' start writing results from column B
            .Cells(y, 1).Select
            strSearch = .Cells(y, 1) ' current keyword
            With objIE
                lngFound = 0
                .navigate "https://www.google.com/search?q=" & EncodeUriComponent(strSearch) ' go to first search results page
                Do
                    Do While .Busy Or Not .READYSTATE = 4: DoEvents: Loop ' wait IE
                    Do Until .document.READYSTATE = "complete": DoEvents: Loop ' wait document
                    Do While TypeName(.document.getelementbyid("res")) = "Null": DoEvents: Loop ' wait [#res] element
                    Set colGItems = .document.getelementbyid("res").getElementsByClassName("g") ' collection of search result [.g] items
                    For Each varGItem In colGItems ' process each item in collection
                        If varGItem.getelementsbytagname("a").Length > 0 And varGItem.getElementsByClassName("st").Length > 0 Then ' must have hyperlink and description
                            strHLink = varGItem.getelementsbytagname("a")(0).href ' get first hyperlink [a] found in current item
                            strDescr = GetInnerText(varGItem.getElementsByClassName("st")(0).innerHTML) ' get first description [span.st] found in current item
                            lngFound = lngFound + 1
                            'Debug.Print (strHLink)
                            'Debug.Print (strDescr)
                            With objSheet ' put result into cell
                                 .Cells(y, x).Value = strDescr
                                 .Hyperlinks.Add .Cells(y, x + 1), strHLink
                                .Cells(y, x).WrapText = True
                                x = x + 1 ' next column
                            End With
                            If lngFound = TargetItemsQty Then Exit Do ' continue with next keyword - necessary quantity of the results for current keyword found
                        End If
                        DoEvents
                    Next
                    If TypeName(.document.getelementbyid("pnnext")) = "Null" Then Exit Do ' continue with next keyword - no [a#pnnext.pn] next page button exists
                    strNextURL = .document.getelementbyid("pnnext").href ' get next page url
                    .navigate strNextURL ' go to next search results page
                Loop
            End With
            y = y + 1 ' next row
        Loop
    End With
    objIE.Quit

    ' google web search page contains the elements:
    ' [div#res] - main search results block
    ' [div.g] - each result item block within [div#res]
    ' [a] - hyperlink ancor(s) within each [div.g]
    ' [span.st] - description(s) within each [div.g]
    ' [a#pnnext.pn] - hyperlink ancor to the next search results page

End Sub

Function EncodeUriComponent(strText As String) As String
    Static objHtmlfile As Object

    If objHtmlfile Is Nothing Then
        Set objHtmlfile = CreateObject("htmlfile")
        objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
    End If
    EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function

Function GetInnerText(strText As String) As String
    Static objHtmlfile As Object

    If objHtmlfile Is Nothing Then
        Set objHtmlfile = CreateObject("htmlfile")
        objHtmlfile.Open
        objHtmlfile.Write "<body></body>"
    End If
    objHtmlfile.body.innerHTML = strText
    GetInnerText = objHtmlfile.body.innerText
End Function

在 IE 11 浏览器中的输出:

您可以尝试在自己身边运行它以查看大量数据的结果。

如果您遇到任何性能问题,那么我建议您尝试使用较少的数据量。

【讨论】: