【问题标题】:Excel VBA IE verses XMLHTTP discrepenciesExcel VBA IE 与 XMLHTTP 的差异
【发布时间】:2020-11-02 03:55:44
【问题描述】:

我正在抓取以下网址https://www2.asx.com.au/markets/trade-our-cash-market/overview/indices/real-time-indices

检索澳大利亚股票市场的指数列表。

我正在使用以下代码,它可以工作并返回标题和表格数据。

Sub GetIEAsx()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLDiv As MSHTML.IHTMLElement
Dim HTMLTable As MSHTML.IHTMLElement

url = "https://www2.asx.com.au/markets/trade-our-cash-market/overview/indices/real-time-indices"

IE.Navigate url

' Wait while IE loading...
  Do While IE.Busy And Not IE.ReadyState = 4
      DoEvents
      Application.Wait DateAdd("s", 1, Now)
  Loop

Set HTMLDoc = IE.document
Set HTMLDiv = HTMLDoc.getElementById("realTimeIndicesWidget")
Set HTMLTable = HTMLDiv.getElementsByTagName("table")(0)

WriteTableToWorksheet HTMLTable
End Sub


Public Sub WriteTableToWorksheet(TableToProcess As MSHTML.IHTMLElement)
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim td As MSHTML.IHTMLElement
Dim rowNum As Long
Dim colNum As Long

Dim OutPutSheet As Worksheet

rowNum = 0
colNum = 0

Set OutPutSheet = ThisWorkbook.Worksheets.Add

    ' searh table section for results
    For Each TableSection In TableToProcess.Children
    
        For Each TableRow In TableSection.Children
        
            rowNum = rowNum + 1
                
            For Each TableCell In TableRow.Children
                colNum = colNum + 1
                OutPutSheet.Cells(rowNum, colNum) = TableCell.innerText
            
            Next TableCell
            
            colNum = 0
            
    Next TableRow
    
Next TableSection

End Sub

但是当我使用 XMLHTTP 来抓取网站时,我得到的是 header(thead) 数据,而不是表 (tbody) 数据。 任何帮助将不胜感激。

Sub GetXmlAsx()
Dim XMLRequest As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim HTMLDiv As MSHTML.IHTMLElement
Dim HTMLTable As MSHTML.IHTMLElement

url = "https://www2.asx.com.au/markets/trade-our-cash-market/overview/indices/real-time-indices"

With XMLRequest
    .Open "GET", url, False
    .send
End With

If XMLRequest.Status <> 200 Then
    MsgBox XMLRequest.Status & " - " & XMLRequest.statusText
    Exit Sub
    
End If

HTMLDoc.body.innerHTML = XMLRequest.responseText


Set HTMLDiv = HTMLDoc.getElementById("realTimeIndicesWidget")
Set HTMLTable = HTMLDiv.getElementsByTagName("table")(0)

WriteTableToWorksheet HTMLTable

End Sub

【问题讨论】:

    标签: excel xml vba


    【解决方案1】:

    您的问题与 Excel 无关。

    只有 Internet Explorer 支持所有基于用户的 Internet 交互。

    XMLHTML 仅支持从友好的 Web 服务器获取数据所需的内容。它不是用来浏览的。

    【讨论】:

      【解决方案2】:

      tbody 中的值不会通过 xhr 加载 html 来加载。但是你可以使用 xhr 从此链接获取带有值的 JSON:
      https://www.asx.com.au/asx/1/index-info?callback=processASXIndices

      Sub GetXmlAsx()
      Dim XMLRequest As New MSXML2.XMLHTTP60
      Dim url As String
      
        url = "https://www.asx.com.au/asx/1/index-info?callback=processASXIndices"
        
        With XMLRequest
            .Open "GET", url, False
            .send
        End With
        
        If XMLRequest.Status <> 200 Then
            MsgBox XMLRequest.Status & " - " & XMLRequest.statusText
            Exit Sub
        End If
        
        MsgBox XMLRequest.responseText
      End Sub
      

      要处理 JSON,您可以使用 Tim Hall 在 GitHub 上提供的这个 VBA 模块:
      https://github.com/VBA-tools/VBA-JSON

      【讨论】:

      • 感谢 Zwenn,我确实在寻找 xhr 链接,但错过了。
      猜你喜欢
      • 2018-11-05
      • 2014-07-19
      • 2017-04-22
      • 1970-01-01
      • 2021-12-20
      • 1970-01-01
      • 1970-01-01
      • 2015-10-17
      • 1970-01-01
      相关资源
      最近更新 更多