【发布时间】:2021-11-22 19:25:00
【问题描述】:
通过我在论坛中发布的其他问题的帮助,我可以从 finviz 筛选器的 3 个选项卡(概述、估值和财务)中提取所有信息。但是,对于提取的所有信息,我似乎无法在第一行填充标题(股票代码、每股收益、市盈率、市值等)。有什么建议吗?
Public Sub Initial()
FetchTabularData "https://finviz.com/screener.ashx?v=111", 1, 11, 0
FetchTabularData "https://finviz.com/screener.ashx?v=121", 13, 10, 3
FetchTabularData "https://finviz.com/screener.ashx?v=161", 23, 10, 3
End Sub
Public Sub FetchTabularData(ByVal Url As String, ByVal StartColumn As Long, AmountOfColumns As Long, ByVal StartChildren As Long)
Const base$ = "https://finviz.com/"
Dim elem As Object, S$, R&, oPage As Object, nextPage$
Dim Http As Object, Html As Object, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Data")
Set Http = CreateObject("MSXML2.XMLHTTP")
Set Html = CreateObject("HTMLFile")
R = 1
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Do While Url <> vbNullString
DoEvents
With Http
.Open "GET", Url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
.send
S = .responseText
End With
With Html
.body.innerHTML = S
For Each elem In .getElementById("screener-content").getElementsByTagName("tr")
If InStr(elem.className, "table-dark-row-cp") > 0 Or InStr(elem.className, "table-light-row-cp") > 0 Then
R = R + 1
' prepare an temporary array to write the row data into
Dim TempRow() As Variant
ReDim TempRow(1 To 1, 1 To AmountOfColumns) As Variant
Dim i As Long
For i = 0 To AmountOfColumns - 1
' instead of writing to the cells directly
' ws.Cells(R, StartColumn + i) = elem.Children(StartChildren + i).innerText
' we write into the temp array
TempRow(1, i + 1) = elem.Children(StartChildren + i).innerText
Next i
' and write the entire row at once (10 times faster than writing each cell)
ws.Cells(R, StartColumn).Resize(ColumnSize:=AmountOfColumns).Value = TempRow
End If
Next elem
Url = vbNullString
For Each oPage In .getElementsByTagName("a")
If InStr(oPage.className, "tab-link") And InStr(oPage.innerText, "next") > 0 Then
nextPage = oPage.getAttribute("href")
Url = base & Replace(nextPage, "about:", "")
End If
Next oPage
End With
Loop
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True
End Sub
【问题讨论】:
-
标题行使用
table-top类名,这将在此语句中失败:If InStr(elem.className, "table-dark-row-cp") > 0 Or InStr(elem.className, "table-light-row-cp") > 0 Then,尝试添加另一个InStr检查table-top在If-Then语句中。 -
即使在添加 [InStr(elem.className, "table-top") > 0] 后似乎也不起作用
-
抱歉,该类位于
td标签中,而不是您正在循环的tr标签中。 @Mafoola -
1) 将
For Each elem In .getElementById("screener-content").getElementsByTagName("tr")更改为For Each elem In .getElementById("screener-content").getElementsByTagName("table")(3).getElementsByTagName("tr"),这样您就可以直接循环表中的tr元素。 2)删除If InStr(elem.className, "table-dark-row-cp") > 0 Or InStr(elem.className, "table-light-row-cp") > 0 Then和End If语句,因为您不再需要它(您已经在循环预期表的tr本身)@Mafoola -
嗨,Raymond,感谢您提供的提示,正在挑选顶行。但是,每次代码进入新页面时,都会再次选择标题行
标签: excel vba web-scraping