【问题标题】:Extracting e-mail address from several websites从多个网站提取电子邮件地址
【发布时间】:2021-02-19 16:49:05
【问题描述】:

我有以下代码,它只从一个网站检索电子邮件地址。我希望它适用于多个网站。

它在网站的源代码中搜索 @ 字符并将其粘贴到工作表的某个范围内。

有什么方法可以从列表的所有网站中获取这些内容,并将它们放在工作表上的另一个下方?

Private Sub Email_Extractor_From_Website()
    Dim oWebData As Object, sPageHTML  As String, sWebURL As String
 
    'The code works fine for 1 website of the below, however i'd like it to work for several websites
    sWebURL = "http://www.example1.com/"
    sWebURL = "http://www.example2.com/"
    sWebURL = "http://www.example3.com./"
    'etc
    'Extract data from website to Excel using VBA
    Set oWebData = CreateObject("MSXML2.ServerXMLHTTP")
    oWebData.Open "GET", sWebURL, False
    oWebData.send
    sPageHTML = oWebData.responseText
 
    'Get webpage data into Excel
    Extract_Email_Address_From_Text sPageHTML
End Sub

Private Sub Extract_Email_Address_From_Text(Optional Text_Content As String)
    Dlim_List = " ""(),:;<>@[\]"
    
    'Get Text Content and assign to a Variable
    If Text_Content = "" Then
        Text_Content = ThisWorkbook.Sheets(1).Cells(2, 1)
    End If
    Web_Page_Text1 = Text_Content
    If Web_Page_Text1 = "" Then
        MsgBox "Error: No Input Provided - Provide Input"
        Exit Sub
    End If
    
    'Scan each word in Text and Extract Email Addresses
    ORow = 2
    While (Web_Page_Text1 <> "")
    
        'Locate position of symbol "@"
        First_@ = VBA.InStr(1, Web_Page_Text1, "@", vbTextCompare)
        
        'If there is no occurance of "@" then terminate process
        If First_@ = 0 Then GoTo End_sub:
        
        'Seperate
        Web_Page_Text2 = VBA.Mid(Web_Page_Text1, 1, First_@ - 1)
        Web_Page_Text3 = VBA.Mid(Web_Page_Text1, First_@ + 1)
        Dlim_Pos_Max = 99999
        Dlim_Pos_Min = 0
        
        For i = 1 To VBA.Len(Dlim_List)
            Dlim_2_Compare = VBA.Mid(Dlim_List, i, 1)
                        
            Dlim_Pos = VBA.InStrRev(Web_Page_Text2, Dlim_2_Compare, -1, vbTextCompare)
            If (Dlim_Pos > Dlim_Pos_Min) And (Dlim_Pos > 0) Then Dlim_Pos_Min = Dlim_Pos
    
            Dlim_Pos = VBA.InStr(1, Web_Page_Text3, Dlim_2_Compare, vbTextCompare)
            If (Dlim_Pos < Dlim_Pos_Max) And (Dlim_Pos > 0) Then Dlim_Pos_Max = Dlim_Pos
        Next i
        If Dlim_Pos_Max = 0 Then GoTo End_sub:
        
        'get Email list to Text Variable
        Email_Domain_Part = VBA.Mid(Web_Page_Text3, 1, Dlim_Pos_Max - 1)
        Email_Local_Part = VBA.Mid(Web_Page_Text2, Dlim_Pos_Min + 1, VBA.Len(Web_Page_Text2) - Dlim_Pos_Min)
        Mail_Address = Email_Local_Part & "@" & Email_Domain_Part
        
        'Scan through remaining content
        ORow = ORow + 1
        ThisWorkbook.Sheets(1).Cells(ORow, 2).Select
        ThisWorkbook.Sheets(1).Cells(ORow, 2) = Mail_Address
        Web_Page_Text1 = VBA.Mid(Web_Page_Text1, Dlim_Pos_Max + First_@ + 1)
    Wend
End_sub:
    MsgBox " Process Completed"
End Sub

【问题讨论】:

  • 我试过你的代码,它对一个 url 工作正常。对于不止一个,您需要编写另一个函数,它将 url 和列号作为参数。你只是说它不起作用,但你有什么错误或请详细说明。

标签: excel vba web-scraping


【解决方案1】:

尝试下面的代码并稍作修改。如果可行,则更改以下函数名称:

Sub Test()
 Email_Extractor_From_Website "www.yahoo.com", 2
 Email_Extractor_From_Website "www.yahoo.com", 3
End Sub

Private Sub Email_Extractor_From_Website(sWebURL As String, OCol As Integer)
Dim oWebData As Object, sPageHTML  As String

'The code works fine for 1 website of the below, however i'd like it to work for several websites
'etc
'Extract data from website to Excel using VBA
 Set oWebData = CreateObject("MSXML2.ServerXMLHTTP")
 oWebData.Open "GET", sWebURL, False
 oWebData.send
 sPageHTML = oWebData.responseText

'Get webpage data into Excel
 Extract_Email_Address_From_Text sPageHTML, OCol
End Sub


Private Sub Extract_Email_Address_From_Text(Text_Content As String, OCol As Integer)
Dlim_List = " ""(),:;<>@[\]"

'Get Text Content and assign to a Variable
If Text_Content = "" Then
   Text_Content = ThisWorkbook.Sheets(1).Cells(2, 1)
End If
Web_Page_Text1 = Text_Content
If Web_Page_Text1 = "" Then
   MsgBox "Error: No Input Provided - Provide Input"
  Exit Sub
End If

'Scan each word in Text and Extract Email Addresses
ORow = 2
While (Web_Page_Text1 <> "")

'Locate position of symbol "@"
First_@ = VBA.InStr(1, Web_Page_Text1, "@", vbTextCompare)

'If there is no occurance of "@" then terminate process
If First_@ = 0 Then GoTo End_sub:

'Seperate
Web_Page_Text2 = VBA.Mid(Web_Page_Text1, 1, First_@ - 1)
Web_Page_Text3 = VBA.Mid(Web_Page_Text1, First_@ + 1)
Dlim_Pos_Max = 99999
Dlim_Pos_Min = 0

For i = 1 To VBA.Len(Dlim_List)
    Dlim_2_Compare = VBA.Mid(Dlim_List, i, 1)

    Dlim_Pos = VBA.InStrRev(Web_Page_Text2, Dlim_2_Compare, -1, vbTextCompare)
    If (Dlim_Pos > Dlim_Pos_Min) And (Dlim_Pos > 0) Then Dlim_Pos_Min = Dlim_Pos

    Dlim_Pos = VBA.InStr(1, Web_Page_Text3, Dlim_2_Compare, vbTextCompare)
    If (Dlim_Pos < Dlim_Pos_Max) And (Dlim_Pos > 0) Then Dlim_Pos_Max = Dlim_Pos
Next i
If Dlim_Pos_Max = 0 Then GoTo End_sub:

'get Email list to Text Variable
Email_Domain_Part = VBA.Mid(Web_Page_Text3, 1, Dlim_Pos_Max - 1)
Email_Local_Part = VBA.Mid(Web_Page_Text2, Dlim_Pos_Min + 1, VBA.Len(Web_Page_Text2) - Dlim_Pos_Min)
Mail_Address = Email_Local_Part & "@" & Email_Domain_Part

'Scan through remaining content
ORow = ORow + 1
ThisWorkbook.Sheets(1).Cells(ORow, OCol).Select
ThisWorkbook.Sheets(1).Cells(ORow, OCol) = Mail_Address
Web_Page_Text1 = VBA.Mid(Web_Page_Text1, Dlim_Pos_Max + First_@ + 1)
Wend
End_sub:
MsgBox " Process Completed"

End Sub

【讨论】:

  • 太好了!它完美地工作。只是一个问题:是否可以取出消息框,使其在完成从一个网站检索邮件时不会“卡住”?谢谢!
  • 评论一下就好了。在 Msgbox 行的开头放一个单引号。
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2016-04-12
  • 1970-01-01
  • 2015-01-26
  • 2011-12-17
相关资源
最近更新 更多