【问题标题】:How can I pull data from website table using vba如何使用 vba 从网站表中提取数据
【发布时间】:2021-10-29 21:38:59
【问题描述】:

我在编写将从网页中提取数据的代码时遇到问题。 代码的第一部分运行良好,但第二部分我无法正确编写以从站点的表中提取数据。 问题是“td”标签,我需要“td”标签中包含的数据,全部或部分。 我尝试了很多方法,但都没有成功。 任何人都可以帮助我从表中提取这些数据的代码吗?

这是我的代码:

Sub provera_TR_klijenta()

    'check in References: _
    Microsoft Internet Controls _
    Microsoft HTML Object Library
    
    Dim objIE As InternetExplorer 'special object variable representing the IE browser
    Dim aEle As HTMLLinkElement 'special object variable for an <a> (link) element
    
    Dim ieDoc As MSHTML.HTMLDocument
    Dim iframeDoc As MSHTML.HTMLDocument
    
    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer
    
    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True
    objIE.Width = 1000
    objIE.Height = 800
    
    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://nbs.rs/en/drugi-nivo-navigacije/servisi/jedinstveni-registar-racuna/index.html"
    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
    
    Set ieDoc = objIE.document
    Set iframeDoc = ieDoc.frames(0).document
    
    iframeDoc.getElementsByName("matbr")(1).Value = "21122017"
    iframeDoc.getElementsByName("Submit")(0).Click
    
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
    
    '*** I don't know what to do from here ***

End Sub

html 代码在这个链接上: https://codebeautify.org/alleditor/cbf73981

【问题讨论】:

  • “我需要包含在“td”标签中的数据,全部或部分” - 那么你想要从结果表中获取哪些数据的逻辑是什么?结果表的 id 为 result,因此您几乎没有选择 1) 如果您知道 XPath 来获取 tr 元素,请使用 querySelectorAll,然后遍历集合并提取您想要的内容;或 2) 更长的方法,但设置一个变量,如 Set tblElement = ieDoc.getElementById("result") 然后 Set tblRows = tblElement.getElementsByTagName("tr") 以获取行元素的集合,然后执行循环并从中提取您想要的内容。
  • 您好,请不要将您的代码发布为图像,而是以文本的形式发布。 Here is why
  • 您好,感谢您的回答。代码太长所以没挂。我在此链接上保存 html 代码:(codebeautify.org/alleditor/cbf73981)
  • navigate IE to this web page (a pretty neat search engine really) - ROFL。可以分享一下表格的html吗?
  • 嗨@QHarr。 html代码很大,所以我把它保存在这个链接上:codebeautify.org/alleditor/cbcbc6ae

标签: html excel vba web pull


【解决方案1】:

我找到了解决办法,谢谢大家!这是正确的代码:

Sub PullDataFromWebsite()

    Dim IE As New SHDocVw.InternetExplorer
    Dim HTMLDoc As MSHTML.HTMLDocument
    Dim HTMLInput As MSHTML.IHTMLElementCollection
    Dim HTMLAs As MSHTML.IHTMLElementCollection
    Dim HTMLA As MSHTML.IHTMLElement
    Dim framesITML As MSHTML.HTMLDocument
    
    IE.Visible = True
    IE.navigate "https://nbs.rs/sr_RS/drugi-nivo-navigacije/servisi/jedinstveni-registar-racuna/"
    
    Do While IE.ReadyState <> READYSTATE_COMPLETE Or IE.Busy
    Loop
    
    Set HTMLDoc = IE.Document
    Set framesITML = HTMLDoc.frames(0).Document
    
    Set HTMLInput = framesITML.getElementsByName("matbr")
    HTMLInput(1).Value = "07364954"
    
    Set HTMLInput = framesITML.getElementsByName("Submit")
    HTMLInput(0).Click
    
    Do While IE.ReadyState <> READYSTATE_COMPLETE Or IE.Busy
    Loop
    
    Dim HTMLTable As MSHTML.IHTMLElement
    Dim HTMLRow As MSHTML.IHTMLElement
    Dim HTMLCell As MSHTML.IHTMLElement
    Dim RowNum As Long, ColNum As Integer
    Dim num_len As Integer

        RowNum = 2
        
        For Each HTMLRow In framesITML.getElementById("result").getElementsByTagName("tr")
            ColNum = 1
            
            For Each HTMLCell In HTMLRow.Children
                num_len = Len(HTMLCell.innerText)
                
                If num_len = 22 Then
                    Cells(RowNum, ColNum) = Trim(HTMLCell.innerText)
                End If
                
            Next HTMLCell
            
            RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
            
        Next HTMLRow
    
    Set HTMLInput = framesITML.getElementsByClassName("page-link")
    
    If HTMLInput.Length > 0 Then
    
        HTMLInput(0).Click
        
        Do While IE.ReadyState <> READYSTATE_COMPLETE Or IE.Busy
        Loop
        
        RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
        
        For Each HTMLRow In framesITML.getElementById("result").getElementsByTagName("tr")
            ColNum = 1
            
            For Each HTMLCell In HTMLRow.Children
                num_len = Len(HTMLCell.innerText)
                
                If num_len = 22 Then
                    Cells(RowNum, ColNum) = Trim(HTMLCell.innerText)
                End If
                
            Next HTMLCell
            
            RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
            
        Next HTMLRow
        
        Set HTMLInput = framesITML.getElementsByClassName("page-link")
        HTMLInput(0).Click
        
        Do While IE.ReadyState <> READYSTATE_COMPLETE Or IE.Busy
        Loop
        
        RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
        
        For Each HTMLRow In framesITML.getElementById("result").getElementsByTagName("tr")
            ColNum = 1
            
            For Each HTMLCell In HTMLRow.Children
                num_len = Len(HTMLCell.innerText)
                
                If num_len = 22 Then
                    Cells(RowNum, ColNum) = Trim(HTMLCell.innerText)
                End If
                
            Next HTMLCell
            
            RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
            
        Next HTMLRow
        
    End If
    
    Range("A2").CurrentRegion.RemoveDuplicates Columns:=1, Header:=xlNo
    
    IE.Quit
        
End Sub

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多