【发布时间】:2017-05-29 09:20:36
【问题描述】:
我正在尝试声明一个节点数组(这不是问题),然后在数组的每个元素中抓取两个子节点的 innerHTML - 以 SE 为例(使用 @987654322 @object 方法),假设我试图在主页上抓取标题和提取问题,有一个节点数组(类名:“question-summary”)。
然后有两个子节点(磁贴 - 类名:“question-hyperlink”和提取物 - 类名:“excerpt”)我的代码使用如下:
Sub Scraper()
Dim ie As Object
Dim doc As Object, oQuestionShells As Object, oQuestionTitle As Object, oQuestion As Object, oElement As Object
Dim QuestionShell As String, QuestionTitle As String, Question As String, sURL As String
Set ie = CreateObject("internetexplorer.application")
sURL = "https://stackoverflow.com/questions/tagged/excel-formula"
QuestionShell = "question-summary"
QuestionTitle = "question-hyperlink"
Question = "excerpt"
With ie
.Visible = False
.Navigate sURL
End With
Set doc = ie.Document 'Stepping through so doc is getting assigned (READY_STATE = 4)
Set oQuestionShells = doc.getElementsByClassName(QuestionShell)
For Each oElement In oQuestionShells
Set oQuestionTitle = oElement.getElementByClassName(QuestionTitle) 'Assigning this object causes an "Object doesn't support this property or method"
Set oQuestion = oElement.getElementByClassName(Question) 'Assigning this object causes an "Object doesn't support this property or method"
Debug.Print oQuestionTitle.innerHTML
Debug.Print oQuestion.innerHTML
Next
End Sub
【问题讨论】:
标签: html vba excel web-scraping