【问题标题】:VBA, Advanced Filter & Remove DuplicatesVBA,高级过滤器和删除重复项
【发布时间】:2016-09-16 18:39:32
【问题描述】:

我有一个列表,其中包含 col A 中的不同路径。 我在 B 和 C 中有一个详细信息列表。

我如何在新工作表上:1) 提取每个唯一路径,2) 为每个路径编译 B * C 中的值并删除重复项。 3)在最后一行完成后重复下一条路径。

我确实有一个错误的宏,但为了简洁准确,我不会发布。除非有人想读,否则请提出要求

任何帮助将不胜感激。

这是我所拥有的(我知道它很长,我会尝试清理一下):

Sub FileDetail()
'Does not fill down, go to bottom to unleased fill down
'Skips unreadable files
'This Macro retrieves data from files picked. The data is based on header. Data is also filtered for unique values.
'You must make sure headers are in the first row and delimted.


Dim wb As Workbook, fileNames As Object, errCheck As Boolean
    Dim ws As Worksheet, wks As Worksheet, wksSummary As Worksheet
    Dim y As Range, intRow As Long, i As Integer

Dim r As Range, lr As Long, myrg As Range, z As Range
    Dim boolWritten As Boolean, lngNextRow As Long
    Dim intColNode As Integer, intColScenario As Integer
    Dim intColNext As Integer, lngStartRow As Long
    Dim lngLastNode As Long, lngLastScen As Long
    Dim intColinstrument As Integer, lngLastinstrument As Long



   'Skipped worksheet for file names
   Dim wksSkipped As Worksheet
   Set wksSkipped = ThisWorkbook.Worksheets("Skipped")


     ' Turn off screen updating and automatic calculation
    With Application
        .ScreenUpdating = False
        .Calculation = xlCalculationManual
    End With

     ' Create a new worksheet, if required
    On Error Resume Next
    Set wksSummary = ActiveWorkbook.Worksheets("Unique data")
    On Error GoTo 0
    If wksSummary Is Nothing Then
        Set wksSummary = ActiveWorkbook.Worksheets.Add(After:=ActiveWorkbook.Worksheets(ActiveWorkbook.Worksheets.Count))
        wksSummary.Name = "Unique data"
    End If

     ' Set the initial output range, and assign column headers
    With wksSummary
        Set y = .Cells(.Rows.Count, 3).End(xlUp).Offset(1, 0)
        Set r = y.Offset(0, 1)
        Set z = y.Offset(0, -2)
        lngStartRow = y.Row
        .Range("A1:E1").Value = Array("File Name", "Sheet Name", "Node", "Book", "Instrument")
    End With

'get user input for files to search
Set fileNames = CreateObject("Scripting.Dictionary")
errCheck = UserInput.FileDialogDictionary(fileNames)
If errCheck Then
   Exit Sub
End If
'''
For Each Key In fileNames 'loop through the dictionary




On Error Resume Next
Set wb = Workbooks.Open(fileNames(Key))
If Err.Number <> 0 Then
    Set wb = Nothing    ' or set a boolean error flag
End If
On Error GoTo 0    ' or your custom error handler

If wb Is Nothing Then
wksSkipped.Cells(wksSkipped.Cells(wksSkipped.Rows.Count, "A").End(xlUp).Row + 1, 1) = fileNames(Key)

Else
    Debug.Print "Successfully loaded " & fileNames(Key)
    wb.Application.Visible = False 'make it not visible
    ' more working with wb




 ' Check each sheet in turn
    For Each ws In ActiveWorkbook.Worksheets
        With ws
             ' Only action the sheet if it's not the 'Unique data' sheet
            If .Name <> wksSummary.Name Then
                boolWritten = False



       ''''''''''''''''''testing additional column..trouble here



                                 ' Find the Anchor Date
                intColScenario = 0
                On Error Resume Next
                intColScenario = WorksheetFunction.Match("instrument.instrumentType", .Rows(1), 0)
                On Error GoTo 0

                If intColScenario > 0 Then
                     ' Only action if there is data in column E
                    If Application.WorksheetFunction.CountA(.Columns(intColScenario)) > 1 Then
                       lr = .Cells(.Rows.Count, intColScenario).End(xlUp).Row


                         ' Copy unique values from the formula column to the 'Unique data' sheet, and write sheet & file details
                        .Range(.Cells(1, intColScenario), .Cells(lr, intColScenario)).AdvancedFilter xlFilterCopy, , r, True
                        r.Offset(0, -2).Value = ws.Name
                        r.Offset(0, -3).Value = ws.Parent.Name



                         ' Delete the column header copied to the list
                        r.Delete Shift:=xlUp
                        boolWritten = True
                    End If
                End If

          ''''''''''''''''''''''''''''''''''''below is working'''''''''''''''''''''''

                 ' Find the Desk column
                intColNode = 0
                On Error Resume Next
                intColNode = WorksheetFunction.Match("book.reportingLine.pathName", .Rows(1), 0)
                On Error GoTo 0

                If intColNode > 0 Then
                     ' Only action if there is data in column A
                    If Application.WorksheetFunction.CountA(.Columns(intColNode)) > 1 Then
                        lr = .Cells(.Rows.Count, intColNode).End(xlUp).Row

                         ' Copy unique values from column A to the 'Unique data' sheet, and write sheet & file details (if not already written)
                        .Range(.Cells(1, intColNode), .Cells(lr, intColNode)).AdvancedFilter xlFilterCopy, , y, True
                        If Not boolWritten Then
                            y.Offset(0, -1).Value = ws.Name
                            y.Offset(0, -2).Value = ws.Parent.Name
                        End If

                         ' Delete the column header copied to the list
                        y.Delete Shift:=xlUp
                    End If
                End If

          ' Find the Intrument
                intColinstrument = 0
                On Error Resume Next
                intColinstrument = WorksheetFunction.Match("instrument.instrumentType", .Rows(1), 0)
                On Error GoTo 0

                If intColinstrument > 0 Then
                     ' Only action if there is data in column A
                    If Application.WorksheetFunction.CountA(.Columns(intColinstrument)) > 1 Then
                        lr = .Cells(.Rows.Count, intColinstrument).End(xlUp).Row

                         ' Copy unique values from column A to the 'Unique data' sheet, and write sheet & file details (if not already written)
                        .Range(.Cells(1, intColinstrument), .Cells(lr, intColinstrument)).AdvancedFilter xlFilterCopy, , z, True
                        If Not boolWritten Then
                            z.Offset(0, -3).Value = ws.Name
                            z.Offset(0, -4).Value = ws.Parent.Name
                        End If

                         ' Delete the column header copied to the list
                        z.Delete Shift:=xlUp
                    End If
                End If




         ' Identify the next row, based on the most rows used in columns C & D
                lngLastNode = wksSummary.Cells(wksSummary.Rows.Count, 3).End(xlUp).Row
                lngLastScen = wksSummary.Cells(wksSummary.Rows.Count, 4).End(xlUp).Row
                lngLastinstrument = wksSummary.Cells(wksSummary.Rows.Count, 5).End(xlUp).Row
                lngNextRow = WorksheetFunction.Max(lngLastNode, lngLastScen) + 1

                If (lngNextRow - lngStartRow) > 1 Then

                   ' Fill down the workbook and sheet names
                    z.Resize(lngNextRow - lngStartRow, 2).FillDown


                    ''''''''Optional if you want headers to be filled down.

                    'If (lngNextRow - lngLastNode) > 1 Then


                         ' Fill down the last Node value
                        'wksSummary.Range(wksSummary.Cells(lngLastNode, 3), wksSummary.Cells(lngNextRow - 1, 3)).FillDown
                    'End If
                    'If (lngNextRow - lngLastScen) > 1 Then
                         ' Fill down the last Scenario value
                        'wksSummary.Range(wksSummary.Cells(lngLastScen, 4), wksSummary.Cells(lngNextRow - 1, 4)).FillDown
                    'End If


                End If



                Set y = wksSummary.Cells(lngNextRow, 3)
                Set r = y.Offset(0, 1)
                Set z = y.Offset(0, -2)
                lngStartRow = y.Row
            End If
        End With
    Next ws
wb.Close savechanges:=False 'close the workbook do not save
Set wb = Nothing 'release the object
End If

Next 'End of the fileNames loop
Set fileNames = Nothing

 ' Autofit column widths of the report
wksSummary.Range("A1:E1").EntireColumn.AutoFit

' Reset system settings
With Application
   .Calculation = xlCalculationAutomatic
   .ScreenUpdating = True
   .Visible = True
End With
End Sub

所以这段代码获取了我指定的文件名、工作表名和列数据。

1) 但是,我无法为此添加额外的列。 (我目前得到 2 个提取的列),还有

2) 我无法将其设置为列相互依赖的格式。 ex 它会给我每条路径的独特价值,但不是每项运动的独特价值。

编辑以包含数据(我也想包含第 4 列和第 5 列,但为简单起见将其保留为 3):

+-------------------------------+------------+--------------+
| path                          | sport      | Teams        |
+-------------------------------+------------+--------------+
| stack/over/flow/larrybird     | basketball | celtics      |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | basketball | bulls        |
+-------------------------------+------------+--------------+
| stack/over/flow/tigerwoods    | golf       | pga          |
+-------------------------------+------------+--------------+
| stack/over/flow/josebautista  | baseball   | bluejays     |
+-------------------------------+------------+--------------+
| stack/over/flow/jordanspeith  | golf       | pga          |
+-------------------------------+------------+--------------+
| stack/over/flow/kevinlove     | basketball | timberwolves |
+-------------------------------+------------+--------------+
| stack/over/flow/lebronjames   | basketball | cavs         |
+-------------------------------+------------+--------------+
| stack/over/flow/stephencurry  | basketball | warriors     |
+-------------------------------+------------+--------------+
| stack/over/flow/larrybird     | baseball   | redsox       |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | baseball   | whitesox     |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | chess      | knight       |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | basketball | hornets      |
+-------------------------------+------------+--------------+
| stack/over/flow/kevinlove     | basketball | cavs         |
+-------------------------------+------------+--------------+
| stack/over/flow/tigerwoods    | golf       | pga          |
+-------------------------------+------------+--------------+

以及预期的结果(我在此填写)

+-------------------------------+------------+--------------+
| path                          | sport      | teams        |
+-------------------------------+------------+--------------+
| stack/over/flow/larrybird     | basketball | celtics      |
+-------------------------------+------------+--------------+
|                               | baseball   | red sox      |
+-------------------------------+------------+--------------+
| stack/over/flow/tigerwoods    | golf       | pga          |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | basketball | bulls        |
+-------------------------------+------------+--------------+
|                               |            | hornets      |
+-------------------------------+------------+--------------+
|                               | baseball   | whitesox     |
+-------------------------------+------------+--------------+
|                               | chess      | knight       |
+-------------------------------+------------+--------------+
| stack/over/flow/kevinlove     | basketball | timberwolves |
+-------------------------------+------------+--------------+
|                               |            | cavs         |
+-------------------------------+------------+--------------+
| stack/over/flow/josebautista  | baseball   | bluejays     |
+-------------------------------+------------+--------------+

第 3 列(第 4 列和第 5 列)获得唯一值似乎是个问题。

【问题讨论】:

  • 为了获得答案(以及任何帮助),您应该始终展示您迄今为止所做的尝试......它也确实帮助我们帮助您:)
  • @DirkReichel,现在发布。请注意它的长大声笑。 :(
  • 没关系...(有总比没有好);)

标签: vba excel


【解决方案1】:

最简单的方法是,复制整个范围,对其进行排序,然后运行一些计算:

Sub Macro1()
  Application.ScreenUpdating = False
  Dim str As String
  With Sheet1
      str = .Range(.Cells(.Rows.Count, 1).End(xlUp), .Cells(1, 3)).Address
      .Range(str).Copy Sheet2.Cells(1, 1)
  End With
  Application.CutCopyMode = False
  With Sheet2
    .Activate
    Dim str2 As String
    str2 = .Range(str).Offset(1).Resize(.Range(str).Rows.Count - 1).Address
    .Range(str2).Value = Evaluate("if(" & str2 & "="""",-1E+99," & str2 & ")")
    .Sort.SortFields.Clear
    .Sort.SortFields.Add .Range(str).Offset(1).Resize(, 1), 0, 1, , 0
    .Sort.SortFields.Add .Range(str).Offset(1, 1).Resize(, 1), 0, 1, , 0
    .Sort.SortFields.Add .Range(str).Offset(1, 2).Resize(, 1), 0, 1, , 0
    .Sort.SetRange .Range(str).Offset(1)
    .Sort.Header = 2
    .Sort.Apply
    .Range(str2).Value = Evaluate("if(" & str2 & "=-1E+99,""""," & str2 & ")")
    Dim val As Variant, i As Long, rng2 As Range
    val = .Range(str).Value
    Set rng2 = .Range(str).Offset(.Range(str).Rows.Count).Resize(1)
    For i = 3 To UBound(val)
      If val(i - 1, 1) = val(i, 1) And val(i - 1, 2) = val(i, 2) And val(i - 1, 3) = val(i, 3) Then Set rng2 = Union(rng2, .Range(str).Rows(i))
    Next
    i = .Range(str).Rows.Count - rng2.Rows.Count
    rng2.EntireRow.Delete xlShiftUp
    With .Range(str).Offset(1).Resize(i - 1, 1)
      .Value = Evaluate("if(" & .Address & "=" & .Offset(-1).Address & ",""""," & .Address & ")")
      With .Offset(, 1)
        .Value = Evaluate("if((" & .Address & "=" & .Offset(-1).Address & ")*(" & .Offset(, -1).Address & "=""""),""""," & .Address & ")")
      End With
    End With
  End With
End Sub

通过电话完成,可能有错误!
现在改动很大,请复制整个代码并重新测试。

编辑

好的,一个完全不同的解决方案。应该很快,但它的工作方式可能不是很清楚:P

Sub Macro2()

  Dim inVal As Variant, outVal() As Variant, orderArr() As Variant
  Dim startRng As Range
  Dim i As Long, j As Long, k As Long, iCount As Long

  Set startRng = Sheet1.Range("A2:C2") 'upmost row-range of the range to be copied (exclude headers!)
  With startRng.Parent
    inVal = .Range(startRng, .Cells(.Rows.Count, startRng.Column).End(xlUp)).Value
  End With
  ReDim orderArr(1 To UBound(inVal))
  For i = 1 To UBound(inVal)
    iCount = 1
    For j = 1 To UBound(inVal)
      For k = 1 To UBound(inVal, 2)
        If StrComp(inVal(i, k), inVal(j, k), 1) = 1 Then iCount = iCount + 1
        If StrComp(inVal(i, k), inVal(j, k), 1) <> 0 Then Exit For
      Next
    Next
    orderArr(i) = iCount
  Next
  k = 1
  ReDim outVal(1 To UBound(inVal, 2), 1 To UBound(inVal))
  For i = 0 To Application.Max(orderArr)
    If IsNumeric(Application.Match(i, orderArr, 0)) Then
      iCount = Application.Match(i, orderArr, 0)
      For j = 1 To UBound(inVal, 2)
        outVal(j, k) = inVal(iCount, j)
      Next
      k = k + 1
    End If
  Next
  ReDim Preserve outVal(1 To UBound(inVal, 2), 1 To k - 1)
  For i = 1 To UBound(outVal)
    For j = UBound(outVal, 2) To 2 Step -1
      If outVal(i, j - 1) = outVal(i, j) Then
        If i = 1 Then
          outVal(i, j) = ""
        ElseIf outVal(i - 1, j) = "" Then
          outVal(i, j) = ""
        End If
      End If
    Next
  Next
  'upper left cell of the output-range
  Sheet2.Range("A2").Resize(UBound(outVal, 2), UBound(outVal)).Value = Application.Transpose(outVal)
End Sub

随意将起始范围 (Sheet1.Range("A2:C2")) 设置为 Selection,然后只需选择范围并启动宏。适用于任何尺寸(虽然非常大的范围可能会冻结 excel 一段时间)。

和往常一样:如果您有任何问题,尽管问:)

【讨论】:

  • 嗨,Dirk,感谢您的回复,它真的很好。如果我想要第三列中的唯一值怎么办?它似乎重复了这些值。这也没有给出第三列的所有值。
  • 还有一个不想要的行为:如果 {a,b,c} 后面跟着 {a,b, } 那么第二行将是空的,也不会被删除(无法避免排序的工作方式)。您还需要检查这些行并删除它们。
  • 出现错误,Set rng2 = Union(rng2, Rng.Rows(i)) 需要对象。
  • 谢谢 Dirk,我在 Rng.EntireRow.Delete xlShiftUp 上遇到了一个错误,要重启我的笔记本电脑很慢。
  • 也改变了...我不应该总是通过“大脑”尝试做这样的事情:P
【解决方案2】:

一种有效的解决方案是:

  • 首先复制带有Range.Copy 的值
  • 然后用Range.Sort对行进行排序
  • 然后用Range.RemoveDuplicates删除重复的行
  • 最后用循环删除重复的分支

此过程删除重复的行并将格式设置为树视图:

Sub RemoveDuplicates()
    Dim rgSource As Range, rgTarget As Range, data(), r&, c&

    ' define the source, the target and the number of columns
    Const columnCount = 3
    Set rgSource = Range("Sheet1!A3")
    Set rgTarget = Range("Sheet1!F3")

    ' copy the values to the targeted range
    Set rgSource = rgSource.Resize(rgSource.End(xlDown).Row - rgSource.Row + 1, columnCount)
    Set rgTarget = rgTarget.Resize(rgSource.Rows.Count, columnCount)
    rgSource.Copy rgTarget

    ' sort the rows on each column
    For c = columnCount To 1 Step -1
      rgTarget.Sort rgTarget.Columns(c)
    Next

    ' build the array of columns for RemoveDuplicates
    Dim rdColumns(0 To columnCount - 1)
    For c = 1 To columnCount: rdColumns(c - 1) = c: Next

    ' remove the duplicated rows
    rgTarget.RemoveDuplicates rdColumns
    Set rgTarget = rgTarget.Resize(rgTarget.End(xlDown).Row - rgTarget.Row + 1, columnCount)

    ' format as a tree view by removing the duplicated branches
    data = rgTarget.Value
    For r = UBound(data) To 2 Step -1
      For c = 1 To columnCount - 1
        If data(r, c) <> data(r - 1, c) Then Exit For
        data(r, c) = Empty
      Next
    Next
    rgTarget.Value = data

End Sub

【讨论】:

  • 我喜欢这个解决方案。使用RemoveDuplicates VBA.Array(1, 2, 3) 避免option base 1 可能出现的问题。
  • 感谢您的回复!这很有效,但是我得到了所有 3 列的许多重复项。有没有办法只做独特的道路、独特的团队和独特的运动。我得到了很多重复。
  • @Jonnyboi,我修复了程序(只需向后迭代而不是向前迭代以删除重复的分支)。我用您的示例对其进行了测试,得到了相同的结果。
  • 很棒的弗洛伦特。我在最后一列有重复,我错过了什么吗? col B 中的唯一值很好,C 不知道为什么。
  • 我粘贴了数据。 col C 中的值是否刚刚填满?只要填写其他值,我不介意此视图(您当前的代码;对于数据透视表,我可以看到这很有用)。但也不要介意我们正在讨论的观点,它只显示了独特的价值。
【解决方案3】:

如果您不介意对结果进行排序,而不是按原始顺序排序,以下代码将执行此操作。它应该“自动适应”任意数量的列。

(如果您需要原始顺序的结果,我会使用集合或字典和用户定义对象的方法)

您的数据应以 A1 开头(第 1 行是列标签),您可以看到在代码中为源数据和结果数据定义工作表的位置。

由于大部分“工作”是在 VBA 数组中完成的,而不是在工作表上,所以它应该运行得非常快。

Option Explicit
Sub SortFormat()
    Dim wsSrc As Worksheet, wsRes As Worksheet, rRes As Range
    Dim vRes As Variant
    Dim R As Range, C As Range
    Dim V As Variant
    Dim I As Long, J As Long

'Set source and results worksheets, ranges
Set wsSrc = Worksheets("Sheet1")
Set wsRes = Worksheets("Sheet2")
    wsRes.Cells.Clear
    Set rRes = wsRes.Cells(1, 1)

Application.ScreenUpdating = False

'Copy source data to results worksheet
Dim LastRow As Long, LastCol As Long
With wsSrc
    LastRow = .Cells(.Rows.Count, 1).End(xlUp).Row
    LastCol = .Cells(1, .Columns.Count).End(xlToLeft).Column
    Set R = .Range(.Cells(1, 1), .Cells(LastRow, LastCol))
    R.Copy rRes
    Application.CutCopyMode = False
End With

'Go to Results sheet
With wsRes
    .Select
    .UsedRange.EntireColumn.AutoFit
End With
rRes.Select

'Sort the data
With wsRes.Sort.SortFields
    .Clear
    Set R = wsRes.UsedRange.Columns
    For Each C In R
        .Add Key:=C, SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal
    Next C
End With

With wsRes.Sort
    .SetRange R
    .Header = xlYes
    .MatchCase = False
    .Orientation = xlTopToBottom
    .Apply
End With

'Remove any completely duplicated rows
'Create array of columns
ReDim V(0 To R.Columns.Count - 1)
For I = 0 To UBound(V)
    V(I) = I + 1
Next I

R.RemoveDuplicates Columns:=(V), Header:=xlYes

'Remove Duplicated items in each row
'Work in VBA array for more speed
vRes = R

For I = UBound(vRes, 1) To 3 Step -1
    If vRes(I, 1) = vRes(I - 1, 1) Then vRes(I, 1) = ""
    For J = 2 To UBound(vRes, 2)
        If vRes(I, J) = vRes(I - 1, J) And _
            vRes(I, J - 1) = "" Then vRes(I, J) = ""
    Next J
Next I

R = vRes

Application.ScreenUpdating = True

End Sub

【讨论】:

    【解决方案4】:

    如果您想创建一个唯一的列表,请使用Dictionary object

    确保添加对脚本运行时控件的引用!只是一些基于您的示例数据的快速而肮脏的代码(如完全未经测试):

    Sub GetUniques()
        Dim oDic as New Dictionary
        Dim r as Integer
        Dim strKey as String
        Dim varValue(2) as Variant
    
        'Get a unique list of Column A values
        r = 3 'Your data starts on row 3
        Do While Cells(r,1).value <> "" 'Run until the first blank line
            strKey = Cells(r,1).value 
            varValue(0) = Cells(r,2).Value
            varValue(1) = Cells(r,3).Value
            If Not oDic.Exists(strKey) Then 
                oDic.Add strKey, varValue
            End If
            r = r +1
        Loop
    
        'Now display your list of unique values
        Dim K as Variant
        Dim myArray as Variant
        r = 3 'We'll start on row 3 again but move over to column I (9)
        For Each K in oDic.Keys
            Cells(r,9).Value = K
            myArray = oDic.Item(K)
            Cells(r,10).Value = myArray(0)
            Cells(r,11).Value = myArray(1)
            r = r + 1
        Next K
    
    End Sub
    

    【讨论】:

    • oDic.Add strKey, varValue 键已与此集合的元素相关联
    • 抱歉,在上面的那行写了一个小错字。这个:If Not oDic.Exists(key) Then 应该是这个:If Not oDic.Exists(strKey) Then
    • 嗨,蒂姆,实际上效果很好。一个问题是第 3 列,它是否没有列出与该路径相关的所有值,它缺少一些。
    • 噢!这对我来说完全是一个愚蠢的错误。 :( 它仅检查 A 列的唯一性。因此,如果您有两行:{1, 2, 3} 和 {1, 2, 2},它不会识别它们是不同的,因为它只检查 A,而不是全部3. 快速解决方法是将所有 3 个单元格连接成一个字符串并将其用作字典的键,然后将数组增加到 3 个元素并存储 A、B 和 C 列。我将再次修改代码。
    • 呼!那也行不通。让我考虑一下。
    猜你喜欢
    • 1970-01-01
    • 2017-06-09
    • 1970-01-01
    • 1970-01-01
    • 2021-12-07
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2021-06-19
    相关资源
    最近更新 更多