【问题标题】:Select attribute value via VBA & XMLHttpRequest通过 VBA & XMLHttpRequest 选择属性值
【发布时间】:2020-04-11 08:29:24
【问题描述】:

我的问题与其他问题 VBA - Select HTML item using VBA 有关。如何在没有 Internet Explorer 的情况下使用 XMLHttpRequest 选择并单击 data-id 属性值? 到目前为止,我的代码如下所示:

Sub Data_multi()
    Dim http As New XMLHTTP60, html As New HTMLDocument
    Dim topic As HTMLHtmlElement
    Dim i As Integer
    For i = 1 To 4 'last page
    Application.ScreenUpdating = False
    With http
        .Open "GET", "https://www.castorama.ru/building-materials/building-dry-materials-and-primers?limit=96&p=" & i, False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        Do: DoEvents: Loop Until .readyState = 4
        html.body.innerHTML = .responseText
    End With

    For Each topic In html.getElementsByClassName("product-info")
        With topic.getElementsByClassName("product-name")
            If .Length Then x = x + 1: Cells(x, 1) = .item(0).innerText
        End With
        With topic.getElementsByClassName("price")
            If .Length Then Cells(x, 2) = .item(0).innerText
        End With
    Next topic
  Next i
 Application.ScreenUpdating = True
End Sub

提前致谢

【问题讨论】:

  • 点击不会对 xmlhttp 做任何事情。你追求的最终结果是什么?您希望获得特定商店的结果吗?
  • 非常感谢您的耐心等待,QHarr。是的,我希望在 id=48 的存储中抓取数据,但当前代码只能抓取默认存储的结果(“Москва, Новорязанское ш.”)。

标签: excel vba web-scraping xmlhttprequest


【解决方案1】:

您需要使用商店的 data-id 向服务器发出 post 请求,然后获取适当的 cookie 并将其在后续 GET 请求中传递给原始 url。如果存储正确更新,则应更新具有该数据 ID 的元素的类名以包含 active 类。您可以将 data-id 值提取到一个变量中,以使其更易于用于其他商店。

Option Explicit

Public Sub SetStore()
    Dim html As MSHTML.HTMLDocument, re As Object, xhr As Object

    Set html = New MSHTML.HTMLDocument
    Set re = CreateObject("VBScript.RegExp")
    Set xhr = CreateObject("MSXML2.ServerXMLHTTP")

    With re
        .Global = True
        .MultiLine = True
    End With

    Dim cookie As String

    With xhr
        .Open "POST", "https://www.castorama.ru/multishop/switch/ajax/shop_id/48/", False
        .setRequestHeader "X-Requested-With", "XMLHttpRequest"
        .send
        cookie = GetCookie(re, .getAllResponseHeaders, "Set-Cookie: (castorama_current_shop=48.*)") '"Set-Cookie", "castorama_current_shop=48; expires=Fri, 17-Jan-2020 22:14:51 GMT; Max-Age=2592000; path=/; HttpOnly"
        .Open "GET", "https://www.castorama.ru/building-materials/building-dry-materials-and-primers?limit=96", False
        .setRequestHeader "Set-Cookie", cookie
        .send
        html.body.innerHTML = .responseText
    End With

    MsgBox html.querySelector(".shop__name[data-id='48']").className
    Stop

End Sub

Public Function GetCookie(ByVal re As Object, ByVal s As String, ByVal p As String) As String
    With re
        .Pattern = p
        GetCookie = .Execute(s)(0).submatches(0)
    End With
End Function

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2019-07-20
    • 2012-11-07
    • 1970-01-01
    • 1970-01-01
    • 2017-11-07
    • 1970-01-01
    • 2011-11-05
    相关资源
    最近更新 更多