【问题标题】:VBA scrape Table every cellVBA刮表每个单元格
【发布时间】:2021-07-26 15:30:21
【问题描述】:

我正在尝试从需要登录、输入搜索选项的网站上抓取表格,甚至在表格显示之前。我设法做到了,但是一旦表格显示出来,我不知道如何将它放到我的工作表中。

我这里有表格 HTML 位置:

IE.document.querySelector("[id='advanced_iframe']").contentDocument.querySelector("table[id=GridView1]")

我可以通过以下方式从表格中获取文本:

IE.document.querySelector("[id='advanced_iframe']").contentDocument.querySelector("table[id=GridView1]").innerText

但我不知道如何将表格的每个单元格放入我的 excel 工作表(“Sheet1”)中的每个单元格中

来自网页的表格:

感谢任何帮助。

【问题讨论】:

    标签: html excel vba web-scraping


    【解决方案1】:

    我只能假设您正在使用 Windows 并尝试从 Excel 内部运行 VBA - 您没有另外说,所以这是最简单的解决方案,不涉及循环或表格格式代码的依赖关系

    您基本上可以使用 Excel 的内置 HTML 翻译工具和 Microsoft 的剪贴板将表格复制/粘贴到 Excel 中

    首先 - 将 Microsoft 的 Clipboard API functions 复制/粘贴到模块中

    Option Explicit
    Private Declare Function OpenClipboard Lib "user32.dll" (ByVal hWnd As Long) As Long
    Private Declare Function EmptyClipboard Lib "user32.dll" () As Long
    Private Declare Function CloseClipboard Lib "user32.dll" () As Long
    Private Declare Function IsClipboardFormatAvailable Lib "user32.dll" (ByVal wFormat As Long) As Long
    Private Declare Function GetClipboardData Lib "user32.dll" (ByVal wFormat As Long) As Long
    Private Declare Function SetClipboardData Lib "user32.dll" (ByVal wFormat As Long, ByVal hMem As Long) As Long
    Private Declare Function GlobalAlloc Lib "kernel32.dll" (ByVal wFlags As Long, ByVal dwBytes As Long) As Long
    Private Declare Function GlobalLock Lib "kernel32.dll" (ByVal hMem As Long) As Long
    Private Declare Function GlobalUnlock Lib "kernel32.dll" (ByVal hMem As Long) As Long
    Private Declare Function GlobalSize Lib "kernel32" (ByVal hMem As Long) As Long
    Private Declare Function lstrcpy Lib "kernel32.dll" Alias "lstrcpyW" (ByVal lpString1 As Long, ByVal lpString2 As Long) As Long
    
    Public Sub SetClipboard(sUniText As String)
        Dim iStrPtr As Long
        Dim iLen As Long
        Dim iLock As Long
        Const GMEM_MOVEABLE As Long = &H2
        Const GMEM_ZEROINIT As Long = &H40
        Const CF_UNICODETEXT As Long = &HD
        OpenClipboard 0&
        EmptyClipboard
        iLen = LenB(sUniText) + 2&
        iStrPtr = GlobalAlloc(GMEM_MOVEABLE Or GMEM_ZEROINIT, iLen)
        iLock = GlobalLock(iStrPtr)
        lstrcpy iLock, StrPtr(sUniText)
        GlobalUnlock iStrPtr
        SetClipboardData CF_UNICODETEXT, iStrPtr
        CloseClipboard
    End Sub
    
    Public Function GetClipboard() As String
        Dim iStrPtr As Long
        Dim iLen As Long
        Dim iLock As Long
        Dim sUniText As String
        Const CF_UNICODETEXT As Long = 13&
        OpenClipboard 0&
        If IsClipboardFormatAvailable(CF_UNICODETEXT) Then
            iStrPtr = GetClipboardData(CF_UNICODETEXT)
            If iStrPtr Then
                iLock = GlobalLock(iStrPtr)
                iLen = GlobalSize(iStrPtr)
                sUniText = String$(iLen \ 2& - 1&, vbNullChar)
                lstrcpy StrPtr(sUniText), iLock
                GlobalUnlock iStrPtr
            End If
            GetClipboard = sUniText
        End If
        CloseClipboard
    End Function
    

    那就换行

    IE.document.querySelector("[id='advanced_iframe']").contentDocument.querySelector("table[id=GridView1]").innerText
    

    将其分配给字符串变量使用 outerHTML 获取 TABLE 标记

    table_html = IE.document.querySelector("[id='advanced_iframe']").contentDocument.querySelector("table[id=GridView1]").outerHTML
    

    然后将 table_html 复制到剪贴板,然后粘贴到表格的起始单元格中

    SetClipboard table_html
    
    Worksheets("Sheet1").Activate
    Range("A1").Select
    ActiveSheet.PasteSpecial Format:="Unicode Text", Link:=False, DisplayAsIcon:=False
    

    这是一个经过测试的工作示例:

    Public Sub TestHTMLPaste()
    
    On Error GoTo Err_TestHTMLPaste
        Const SiteURL As String = "https://www.grapecity.com/controls/activereports/download-version-history"
        
        Dim IE          As Object
        Dim BodyHTML    As String
        Dim FieldStart  As Integer
        Dim FieldEnd    As Integer
        
        Dim TableHTML   As String
        
        Set IE = CreateObject("InternetExplorer.Application")
        
        With IE
            .Navigate SiteURL
            Do While .Busy Or .ReadyState <> 4
                DoEvents
            Loop
        
            BodyHTML = .document.body.innerhtml
            
            Debug.Print BodyHTML
            
            If InStr(BodyHTML, "<table class=""gctable"">") > 0 Then
                Debug.Print "Found it"
                
                TableHTML = .document.querySelector("table[class=gctable]").outerHTML
                
                SetClipboard TableHTML
                DoEvents
                
            End If
            
            .Quit
            
        End With
        
        DoEvents
        
        If TableHTML <> "" Then
            Worksheets("Sheet1").Activate
            Range("A1").Select
            ActiveSheet.PasteSpecial Format:="Unicode Text", Link:=False, DisplayAsIcon:=False
            DoEvents
            Range("A1").Select
        Else
            MsgBox " No Table HTML found"
        End If
        
        
    Err_TestHTMLPaste:
        Set IE = Nothing
    
    End Sub
    

    【讨论】:

    • 代码通过,但似乎没有发生任何事情。没有错误,也没有单元格值。我已经将微软的剪贴板 API 函数放入模块中,修改了 tableHTML 代码并调用了剪贴板函数。我错过了什么吗?
    • 发布你的“改编代码”——你尝试过什么样的调试?基本 DEBUG.PRINT table_html 的结果是什么
    • 改编代码 --> table_html = IE.document.querySelector("[id='advanced_iframe']").contentDocument.querySelector("table[id=GridView1]").innerHTML SetClipboard table_html Sheets ("Table_Extract").Range("A1").PasteSpecial Format:="HTML", link:=False, DisplayAsIcon:=False
    • 你可以简单地用Dim clipboard As Object: Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")然后clipboard.SetText HtmlTable.outerHTML创建一个剪贴板对象的实例
    • ^^ 同意。一点都不直观。但是添加到集合中的有用工具:-)
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2017-08-08
    • 2021-09-01
    • 1970-01-01
    相关资源
    最近更新 更多