【发布时间】:2018-04-23 09:35:26
【问题描述】:
有一个程序,解析网站。程序运行良好,但时间过长。我想简化/加快它。请告诉我,也许有关于这个问题的任何专门网站?如有任何帮助,我将不胜感激。
程序如何运作:
首先,通过超链接,程序进入该站点,在该站点中找到某个元素表
然后把每个元素的“href”取出来,变成超链接,插入到Excel的第一张表中
- 然后它提取每个元素的文本并将其插入到 Excel 的第二个表格中
-
然后遍历第 1 和第 2 表的元素,因此在第 3 表中每个元素都包含“超链接 + 文本”
Sub Softгиперссылки() Application.DisplayAlerts = False Call mainмассивы Application.DisplayAlerts = True End Sub Sub mainмассивы() Dim r As Range Dim firstAddress As String Dim iLoop As Long Dim book1 As Workbook Dim sheetNames(1 To 19) As String Dim Ssilka As String sheetNames(1) = "Лист1" sheetNames(2) = "Лист2" sheetNames(3) = "Лист3" sheetNames(4) = "Лист4" sheetNames(5) = "Лист5" sheetNames(6) = "Лист6" sheetNames(7) = "Лист7" sheetNames(8) = "Лист8" sheetNames(9) = "Лист9" sheetNames(10) = "Лист10" sheetNames(11) = "Лист11" sheetNames(12) = "Лист12" sheetNames(13) = "Лист13" sheetNames(14) = "Лист14" sheetNames(15) = "Лист15" sheetNames(16) = "Лист16" sheetNames(17) = "Лист17" sheetNames(18) = "Лист18" sheetNames(19) = "Лист19" 'пропускаем ошибку Set book1 = Workbooks.Open("E:\Super M\Проект ставки\Поиск решения\Усов 7\Условия для андердогов\6.xlsm") iLoop = -1 With book1.Worksheets("Лист1").Range("R34:R99") For Each r In .Rows If r.Value = 1 Then iLoop = iLoop + 1 Ssilka = r.Offset(, -13).Hyperlinks.Item(1).Address .Parent.Parent.Worksheets(sheetNames(1)).Activate .Parent.Parent.Save extractTable Ssilka, book1, iLoop End If Next r End With book1.Save book1.Close Exit Sub End Sub Function extractTable(Ssilka As String, book1 As Workbook, iLoop As Long) Dim oDom As Object, oTable As Object, oRow As Object Dim iRows As Integer, iCols As Integer Dim x As Integer, y As Integer Dim data() Dim oHttp As Object Dim oRegEx As Object Dim sResponse As String Dim oRange As Range Dim Perem1 As String Dim Perem2 As String 'для гиперссылки ' get page Set oHttp = CreateObject("MSXML2.XMLHTTP") oHttp.Open "GET", Ssilka, False oHttp.Send ' cleanup response sResponse = StrConv(oHttp.responseBody, vbUnicode) Set oHttp = Nothing sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE ")) Set oRegEx = CreateObject("vbscript.regexp") With oRegEx .MultiLine = True .Global = True .IgnoreCase = False .Pattern = "<(script|SCRIPT)[\w\W]+?</\1>" sResponse = .Replace(sResponse, "") End With Set oRegEx = Nothing ' create Document from response Set oDom = CreateObject("htmlFile") oDom.Write sResponse DoEvents ' table with results, indexes starts with zero Set oTable = oDom.getelementsbytagname("table")(3) DoEvents iRows = oTable.Rows.Length iCols = oTable.Rows(1).Cells.Length ' first row and first column contain no intresting data ReDim data(1 To iRows - 1, 1 To iCols - 1) ' fill in data array For x = 1 To iRows - 1 Set oRow = oTable.Rows(x) For y = 1 To iCols - 1 If oRow.Cells(y).Children.Length > 0 Then data(x, y) = oRow.Cells(y).getelementsbytagname("a")(0).getattribute("href") End If Next y Next x Set oRow = Nothing Set oTable = Nothing Set oDom = Nothing ' put data array on worksheet Set oRange = book1.ActiveSheet.Cells(110, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1) oRange.NumberFormat = "@" oRange.Value = data oRange.Replace What:="about:", Replacement:="http://allscores.ru/soccer/" Set oRange = Nothing '!!!! для текста ' get page Set oHttp = CreateObject("MSXML2.XMLHTTP") oHttp.Open "GET", Ssilka, False oHttp.Send ' cleanup response sResponse = StrConv(oHttp.responseBody, vbUnicode) Set oHttp = Nothing sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE ")) Set oRegEx = CreateObject("vbscript.regexp") With oRegEx .MultiLine = True .Global = True .IgnoreCase = False .Pattern = "<(script|SCRIPT)[\w\W]+?</\1>" sResponse = .Replace(sResponse, "") End With Set oRegEx = Nothing ' create Document from response Set oDom = CreateObject("htmlFile") oDom.Write sResponse DoEvents ' table with results, indexes starts with zero Set oTable = oDom.getelementsbytagname("table")(3) DoEvents iRows = oTable.Rows.Length iCols = oTable.Rows(1).Cells.Length ' first row and first column contain no intresting data ReDim data(1 To iRows - 1, 1 To iCols - 1) ' fill in data array For x = 1 To iRows - 1 Set oRow = oTable.Rows(x) For y = 1 To iCols - 1 If oRow.Cells(y).Children.Length > 0 Then data(x, y) = oRow.Cells(y).innerText End If Next y Next x Set oRow = Nothing Set oTable = Nothing Set oDom = Nothing ' put data array on worksheet Set oRange = book1.ActiveSheet.Cells(185, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1) oRange.NumberFormat = "@" oRange.Value = data Set oRange = Nothing '!!!!! цикл для текст+гиперссылка For A = 0 To 4 For B = 0 To 65 Perem1 = book1.ActiveSheet.Cells(110 + B, (26 + (iLoop * 21)) + A).Value Perem2 = book1.ActiveSheet.Cells(185 + B, (26 + (iLoop * 21)) + A).Value book1.ActiveSheet.Hyperlinks.Add Anchor:=Cells(34 + B, (26 + (iLoop * 21)) + A), Address:=Perem1, TextToDisplay:=Perem2 Next Next End Function
【问题讨论】:
-
如果您的工作代码只需要改进,那么您可能在这篇文章的错误位置。 Code Review 是他们处理现有/工作代码的地方,并在速度、安全性、可持续性和寿命方面尽最大努力改进它,包括最佳实践。试一试。他们很好!
-
@Ralph,谢谢!
标签: vba excel parsing href innertext