【问题标题】:Import web data with Excel VBA使用 Excel VBA 导入 Web 数据
【发布时间】:2020-08-30 10:38:38
【问题描述】:

我希望当我导入产品的网站 URL 时,它会在电子表格中显示产品的名称、描述、价格和图片 URL。

这是我所拥有的:(不是真正的网站)

Sub Trial() Dim ieObj As InternetExplorer Dim ht As HTMLDocument
    
    Website = "https://www.amazon.com/resistencia-Avalon-cartas-empaque-original/dp/B009SAAV0C?pf_rd_r=WWESR922Z214Y10K3PHH&pf_rd_p=4dd821c0-e689-433a-a035-5e03461484eb&pd_rd_r=305599f9-5f3f-41c6-9a13-8daefd8d998c&pd_rd_w=qWHso&pd_rd_wg=BNzqC&ref_=pd_gw_unk"
    
    Set ieObj = New InternetExplorer ieObj.Visible = True ieObj.navigate Website
    
    Do Until ieObj.readyState = READYSTATE_COMPLETE DoEvents Loop
    
    Set ht = ieObj.document
    
End Sub

附加信息
产品名称:The Resistance:Avalon Social Deduction Gam
id="产品标题" class="a-size-large product-title-word-break"

产品描述: The Resistance:Avalon 是一款独立游戏,而 The Resistance 不需要玩;游戏兼容,可以组合
适合 5 到 10 名玩家
需要 30 分钟的游戏时间
(全部在 class= "a-list-item" 但不同的部分)

价格:17.12 美元
id="priceblock_ourprice"
class="a-size-medium a-color-price priceBlockBuyingPriceString"

图片网址:https://images-na.ssl-images-amazon.com/images/I/91JhcC33dTL._AC_SY879_.jpg
img alt="抵抗:阿瓦隆社会演绎游戏"

【问题讨论】:

    标签: excel vba web-scraping


    【解决方案1】:

    您可以使用 xhr 代替 IE 来获取上述字段。它肯定会使执行速度更快,并为您节省大量时间。我只使用正则表达式来隔离所需的图像链接。请确保在执行前将Microsoft HTML Object Library 添加到参考库中。

    Sub GetContent()
        Const URL = "https://www.amazon.com/resistencia-Avalon-cartas-empaque-original/dp/B009SAAV0C?pf_rd_r=WWESR922Z214Y10K3PHH&pf_rd_p=4dd821c0-e689-433a-a035-5e03461484eb&pd_rd_r=305599f9-5f3f-41c6-9a13-8daefd8d998c&pd_rd_w=qWHso&pd_rd_wg=BNzqC&ref_=pd_gw_unk"
        Dim S$, sImage$, Matches As Object
    
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", URL, False
            .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:79.0) Gecko/20100101 Firefox/79.0"
            .send
            S = .responseText
        End With
        
        With New HTMLDocument
            .body.innerHTML = S
            [A1] = .querySelector("h1#title > span#productTitle").innerText
            [B1] = Trim(Split(.querySelector("#feature-bullets > ul.a-unordered-list").innerText, "model number.")(1))
            [C1] = .querySelector("span[id='priceblock_ourprice']").innerText
            sImage = .querySelector("#imgTagWrapperId > img").getAttribute("data-a-dynamic-image")
        End With
        
        With CreateObject("VBScript.RegExp")
            .Global = True
            .IgnoreCase = False
            .Pattern = """(.*?)"""
            .MultiLine = True
            Set Matches = .Execute(sImage)
            [D1] = Matches(2).submatches(0)
        End With
    End Sub
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2012-11-27
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多