您不需要自动化浏览器。如果您在选择日期时检查网络流量,您将看到 XHR 请求信息。您可以使用这些详细信息(实际上我将其缩短为所需的 url 参数)来检索页面内容。
信息包含在table 标签元素中。 champion 位于类名称为 blockBar 的表中,否则该信息用于页面上所见的行信息。为了利用 querySelector(这是HTMLDocument 的一种方法)来按类名为各个表选择子表级元素,我将各个表 html 粘贴到 surrogate html 文档变量中;然后我可以再次访问 querySelector,因此可以编写很好的灵活/描述性 css selectors 来匹配元素。
输出中的列在 XHR 响应中都有很好的描述性类名称,因此您可以使用这些名称来确定要写入哪一列。由于分数信息可能会丢失输出格式,因此我使用 Select Case 语句来测试这些 css 选择器,并附加一个单引号以保留输出格式。
为了提高效率,我选择将所有结果存储在一个数组中并一次性写出。
Option Explicit
Public Sub GetMatchInfo()
Dim headers(), results(), r As Long, c As Long, ws As Worksheet, i As Long
Dim champion As String, html As HTMLDocument, html2 As HTMLDocument, cssSelectors(), j As Long
Set html = New HTMLDocument
Set html2 = New HTMLDocument
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("Date", "Time", "Status", "Champion", "Home Team", "Full Time Score", "Away Team", "Half Time", "Penalties Score")
cssSelectors = Array(".kick_t_dt", ".kick_t_ko", ".status", "champion", ".home", ".score_link", ".away", ".halftime", ".after_pen")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.scorespro.com/soccer/ajax-calendar.php?mode=results&date=2019-08-07", False
.send
html.body.innerHTML = .responseText
End With
Dim tables As Object, selector As String
Set tables = html.querySelectorAll("table")
ReDim results(1 To tables.Length, 1 To UBound(headers) + 1)
For i = 0 To tables.Length - 1
If tables.item(i).className = "blockBar" Then
champion = tables.item(i).innerText
Else
r = r + 1
html2.body.innerHTML = tables.item(i).outerHTML
On Error Resume Next
For j = LBound(cssSelectors) To UBound(cssSelectors)
selector = cssSelectors(j)
Select Case selector
Case ".score_link", ".halftime", ".after_pen"
results(r, j + 1) = "'" & html2.querySelector(cssSelectors(j)).innerText
Case "champion"
results(r, j + 1) = champion
Case Else
results(r, j + 1) = html2.querySelector(cssSelectors(j)).innerText
End Select
Next
On Error GoTo 0
End If
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
示例输出:
使用 IE
Option Explicit
Public Sub GetMatchInfo()
Dim headers(), results(), r As Long, c As Long, ws As Worksheet, i As Long
Dim champion As String, html As HTMLDocument, html2 As HTMLDocument, cssSelectors(), j As Long
Set html = New HTMLDocument
Set html2 = New HTMLDocument
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("Date", "Time", "Status", "Champion", "Home Team", "Full Time Score", "Away Team", "Half Time", "Penalties Score")
cssSelectors = Array(".kick_t_dt", ".kick_t_ko", ".status", "champion", ".home", ".score_link", ".away", ".halftime", ".after_pen")
With CreateObject("InternetExplorer.Application")
.Navigate2 "https://www.scorespro.com/soccer/results/"
While .Busy Or .readyState <> 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
html.body.innerHTML = .document.body.innerHTML
.Quit
End With
Dim tables As Object, selector As String
Set tables = html.querySelectorAll("table")
ReDim results(1 To tables.Length, 1 To UBound(headers) + 1)
For i = 0 To tables.Length - 1
If tables.item(i).className = "blockBar" Then
champion = tables.item(i).innerText
Else
r = r + 1
html2.body.innerHTML = tables.item(i).outerHTML
On Error Resume Next
For j = LBound(cssSelectors) To UBound(cssSelectors)
selector = cssSelectors(j)
Select Case selector
Case ".score_link", ".halftime", ".after_pen"
results(r, j + 1) = "'" & html2.querySelector(cssSelectors(j)).innerText
Case "champion"
results(r, j + 1) = champion
Case Else
results(r, j + 1) = html2.querySelector(cssSelectors(j)).innerText
End Select
Next
On Error GoTo 0
End If
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub