【问题标题】:VBA to scrape data from website - error on empty dataVBA从网站上抓取数据 - 空数据错误
【发布时间】:2016-01-12 02:21:50
【问题描述】:

我正在创建一个宏来从网站上抓取数据。我遇到的问题是当最后一个完整页面被抓取并且 A 列没有数据时,但其他列有,我收到运行时 1004 错误。例如,如果要抓取的总页数为 6,并且 A 列没有关于第 5 页的最后一个条目的数据,则宏将抓取第 5 页的所有数据,但在尝试获取时会抛出运行时错误到第6页。第6页也有数据,但我在想,由于A列中没有数据,它只是决定给出运行时错误。对此有什么想法吗?另外,使用我所包含的代码,在下一个箭头消失之前有宏循环会更容易吗?如果是这样,我会怎么做?

'Macro to query Daily Activity Search for DFB Counties
'Run Monday to pull data from Friday

Sub queryActivityDailyMforFWorking()

Dim nextrow As Integer, i As Integer
Dim dates
dates = Date - 3

Application.ScreenUpdating = False
Application.DisplayStatusBar = True

Do While i <= 50
    Application.StatusBar = "Processing Page " & i
    nextrow = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).row + 1
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;https://www.myfloridalicense.com/delinquency_results.asp?SID=&page=" & i & "&county_1=16&county_1=21&county_1=23&county_1=32&county_1=36&county_1=41&county_1=46&county_1=53&county_1=54&county_1=57&county_1=60&county_1=66&status=R&send_date=" & dates & "&search_1.x=1", _
        Destination:=Range("A" & nextrow))

        '.Name = _
        "2015&search_1.x=40&search_1.y=11&date=on&county_1=AL&lic_num_del=&lic_num_rep=&status=NS&biz_name=&owner_name="
        .FieldNames = False
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlSpecifiedTables
        .WebFormatting = xlWebFormattingNone
        .WebTables = "10"
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = False
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False

    'autofit columns
    Columns("A:G").Select
    Selection.EntireColumn.AutoFit

   'check for filter, if not then turn on filter
   ActiveSheet.AutoFilterMode = False
    If Not ActiveSheet.AutoFilterMode Then
    ActiveSheet.Range("A:G").AutoFilter
    End If

i = i + 1
End With
Application.StatusBar = False

'Align text left
Cells.Select
With Selection
    .HorizontalAlignment = xlLeft
    .VerticalAlignment = xlBottom
    .WrapText = False
    .Orientation = 0
    .AddIndent = False
    .IndentLevel = 0
    .ShrinkToFit = False
    .ReadingOrder = xlContext
    .MergeCells = False
End With

Loop

End Sub

【问题讨论】:

  • 当我选择调试时,它显示错误即将发生.Refresh BackgroundQuery :=False
  • 是否设置为后台查询为真,但刷新为假?
  • 是的,我相信...
  • 有零页吗?您声明 i 但从不为其赋值,因此在第一个循环中 i0
  • @Jeeped 不,没有零页。我忘了将该部分包含在代码中。即使我声明 i 的值为 1,我仍然收到相同的错误消息

标签: vba excel web-scraping


【解决方案1】:

我无法复制您的错误,但我猜它与您的 nextrow 变量有关。如果页面上的数据以空单元格结尾,则下一页数据的 nextrow 值将设置在前一页的数据中。我认为当您添加另一个查询表然后尝试刷新数据时会导致一些问题,因为表会重叠。如果您知道每一行总是有数据的列,则可以通过获取其他列之一的底行来解决此问题。我做了一些更新,似乎对我来说效果很好:

  • 添加了错误处理
  • 检查 A 列和 B 列的底行数据
  • 添加了一些逻辑来检查是否返回完整页面,如果不返回则退出循环,这样您就不必继续解析空页面
  • 格式化连接字符串中的日期,因为我发现过去会导致问题
  • 添加了删除不想要的标题的选项
  • 将单元格格式移出循环,使其只执行一次

希望这会有所帮助。

Sub queryActivityDailyMforFWorking()
On Error GoTo Err_queryActivityDailyMforFWorking

Const RowsPerPage As Byte = 20
Const DeleteHeader As Boolean = True

Dim nextrow As Integer, maxrow As Integer, i As Integer
Dim dates As Date

dates = Date - 3

Application.ScreenUpdating = False
Application.DisplayStatusBar = True

nextrow = 1
For i = 1 To 50
    Application.StatusBar = "Processing Page " & i
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;https://www.myfloridalicense.com/delinquency_results.asp?SID=&page=" & i & "&county_1=16&county_1=21&county_1=23&county_1=32&county_1=36&county_1=41&county_1=46&county_1=53&county_1=54&county_1=57&county_1=60&county_1=66&status=R&send_date=" & Format(dates, "m/d/yyyy") & "&search_1.x=1", _
        Destination:=Range("A" & nextrow))
        '.Name = _
        "2015&search_1.x=40&search_1.y=11&date=on&county_1=AL&lic_num_del=&lic_num_rep=&status=NS&biz_name=&owner_name="
        .FieldNames = False
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlSpecifiedTables
        .WebFormatting = xlWebFormattingNone
        .WebTables = "10"
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = False
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False
    End With

    ' Delete the header as required
    If DeleteHeader And i > 1 And ActiveSheet.Cells(nextrow, 1).Value = "License" Then ActiveSheet.Cells(nextrow, 1).EntireRow.Delete

    ' Find the bottom row
    maxrow = Application.WorksheetFunction.Max(ActiveSheet.Cells(Rows.Count, 1).End(xlUp).Row, ActiveSheet.Cells(Rows.Count, 2).End(xlUp).Row)
    ' Stop scraping if a full page wasn't returned
    If (maxrow - nextrow) < (RowsPerPage - IIf(DeleteHeader, 1, 0)) Then
        Exit For
    ' Otherwise set the row for the next page of data
    Else
        nextrow = maxrow + 1
    End If
Next i

Application.StatusBar = "Formatting data"

'autofit columns
ActiveSheet.Columns.EntireColumn.AutoFit

'check for filter, if not then turn on filter
ActiveSheet.AutoFilterMode = False
If Not ActiveSheet.AutoFilterMode Then ActiveSheet.Range("A:G").AutoFilter

'Align text left
With ActiveSheet.Cells
    .HorizontalAlignment = xlLeft
    .VerticalAlignment = xlBottom
    .WrapText = False
    .Orientation = 0
    .AddIndent = False
    .IndentLevel = 0
    .ShrinkToFit = False
    .ReadingOrder = xlContext
    .MergeCells = False
End With

Exit_queryActivityDailyMforFWorking:
    Application.StatusBar = False
    Application.ScreenUpdating = True
    Exit Sub

Err_queryActivityDailyMforFWorking:
    MsgBox Err.Description, vbCritical + vbOKOnly, Err.Number & " - Web Scraping Error"
    Resume Exit_queryActivityDailyMforFWorking

End Sub

【讨论】:

  • 这项工作的方式与我对代码的预期工作方式相同。非常感谢您的帮助! :) 找到最下面一行是我最长时间试图弄清楚如何编码的事情。我认为这将是告诉代码在找到最后一行数据后停止运行的最简单方法,但无法弄清楚如何对其进行编码。你的回答肯定帮助了我一大堆。再次感谢您
猜你喜欢
  • 2015-01-19
  • 1970-01-01
  • 1970-01-01
  • 2021-01-23
  • 1970-01-01
  • 1970-01-01
  • 2013-03-14
  • 2013-05-21
相关资源
最近更新 更多