您可以使用多种方法。我现在将显示 3。
使用 CSS 选择器过滤:
您可以从简单地使用attribute = value css selector 开始,并以运算符 (^) 开头来定位感兴趣的类。我看不到 html 的其余部分,所以不确定这是否会增加额外的行。
Dim rows As Object
Set rows = ieDoc.querySelectorAll("[class^='Reports-Line']")
阅读您的 html 的示例(为解析稍作修改)....我隔离了感兴趣的行并结合了它们的 outerHTML。我在任一端添加<table></table> 标签并使用剪贴板将表格粘贴到工作表
需要参考 Microsoft HTML 对象库。
Option Explicit
Public Sub Test()
Dim html As HTMLDocument, rows As Object, i As Long, clipboard As Object, s As String
Set html = GetHTMLFileContent("C:\Users\User\Desktop\test.html")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set rows = html.querySelectorAll("[class^='Reports-Line']")
For i = 0 To rows.Length - 1
s = s & rows.item(i).outerHTML
Next
s = "<table>" & s & "</table>"
clipboard.SetText s
clipboard.PutInClipboard
ActiveSheet.Range("A1").PasteSpecial
End Sub
Public Function GetHTMLFileContent(ByVal filePath As String) As HTMLDocument
Dim fso As Object, hFile As Object, hString As String, html As HTMLDocument
Set html = New HTMLDocument
Set fso = CreateObject("Scripting.FileSystemObject")
Set hFile = fso.OpenTextFile(filePath)
Do Until hFile.AtEndOfStream
hString = hFile.ReadAll()
Loop
html.body.innerHTML = hString
Set GetHTMLFileContent = html
End Function
HTML 示例(需要调整但足以用于演示):
<head></head>
<body>
<table id="mainContent" class="MainContent-Report" cellspacing="0" cellpadding="0" ... <div class="reports-area">
</table>
<table cellspacing="0" cellpadding="0" width="100%">
<tbody>
<tr class="reports-heading">
don't want
</tr>
<tr id="R211" class="Reports-LineOne">
do want 1
</tr>
<tr id="R212" class="Reports-LineTwo">
do want 2.
</tr>
</tbody>
</table>
</body>
</html>
使用 javascript 删除违规项目
以下显示使用 javascript 从表中删除第一行(标题行):
Option Explicit
Public Sub AlterTable()
Dim ie As Object, html As HTMLDocument, clipboard As Object, hTable As HTMLTable
Set ie = CreateObject("InternetExplorer.Application")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With ie
.Visible = True
.Navigate2 "https://www.w3schools.com/html/html_tables.asp"
While .Busy Or .readyState <> 4: DoEvents: Wend
With .document
.parentWindow.execScript "var child = document.querySelector('#customers tr:nth-of-type(1)'); child.parentNode.removeChild(child);"
'in your case .parentWindow.execScript "var child = document.querySelector('.reports-area table tr:nth-of-type(1)'); child.parentNode.removeChild(child);"
Set hTable = .querySelector("#customers") ' in your case use: .querySelector(".reports-area table")
End With
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
ActiveSheet.Range("A1").PasteSpecial
.Quit
End With
End Sub
使用 HTMLDocument 方法删除有问题的项目:
同样的事情,但没有 javascript,即直接使用 HTMLDocument 的方法
Option Explicit
Public Sub AlterTable()
Dim ie As Object, html As HTMLDocument, clipboard As Object, hTable As HTMLTable
Set ie = CreateObject("InternetExplorer.Application")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With ie
.Visible = True
.Navigate2 "https://www.w3schools.com/html/html_tables.asp"
While .Busy Or .readyState <> 4: DoEvents: Wend
With .document
.querySelector("#customers tr:nth-of-type(1)").ParentNode.RemoveChild .querySelector("#customers tr:nth-of-type(1)") 'in your case use querySelector(".reports-area table tr:nth-of-type(1)").ParentNode.RemoveChild .querySelector(".reports-area table tr:nth-of-type(1)")
Set hTable = .querySelector("#customers") ' in your case use: .querySelector(".reports-area table")
End With
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
ActiveSheet.Range("A1").PasteSpecial
.Quit
End With
End Sub