删除使用 union 以外的一系列行的更快方法

Faster method to delete a range of rows other that using union

我正在使用以下代码:
Delete the similar rows, keeping only one and combine cells values in the range "N", separated by vbLf
它有效,但范围很大(例如 3 万行),宏需要很长时间才能完成。
调试代码后,我发现使用 union 会导致宏需要很长时间才能完成。

Set rngDel = Union(rngDel, ws.Range("A" & i + m))

那么对于下面的代码,如何采用一种更快的方法来删除除使用 union 之外的行范围?
预先感谢任何有用的评论和答案。

Sub DeleteSimilarRows_combine_Last_Column_N()
 
    Dim LastRow As Long, ws As Worksheet, arrWork, rngDel As Range, i As Long, j As Long, k As Long
     Dim strVal As String, m As Long
 
      Set ws = ActiveSheet: LastRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row
    arrWork = ws.Range("A1:A" & LastRow).Value2 'Place the range in an array to make iteration faster
 
    Application.DisplayAlerts = False: Application.ScreenUpdating = False
    For i = 2 To UBound(arrWork) - 1                'Iterate between the array elements:
        If arrWork(i, 1) = arrWork(i + 1, 1) Then
            'Determine how many consecutive similar rows exist:______
            For k = 1 To LastRow
                If i + k + 1 >= UBound(arrWork) Then Exit For
                If arrWork(i, 1) <> arrWork(i + k + 1, 1) Then Exit For
            Next k '__
 
            For j = 14 To 14                  'Build the concatenated string of cells in range "N":
                strVal = ws.Cells(i, j).Value
                For m = 1 To k
                    strVal = strVal & vbLf & ws.Cells(i + m, j).Value
                Next m
                ws.Cells(i, j).Value = strVal: strVal = ""
           Next j
 
           For m = 1 To k                    'Place the cells for rows to be deleted in a Union range, to delete at the end, at once
                If rngDel Is Nothing Then
                     Set rngDel = ws.Range("A" & i + m)
                Else
                    Set rngDel = Union(rngDel, ws.Range("A" & i + m)) 'This line causes macro takes very long time to finish.
                End If
         Next m
         i = i + k: If i >= UBound(arrWork) - 1 Then Exit For 'Increment the i variable and exiting if the resulted value exits the array size
       End If
    Next i
 
    If Not rngDel Is Nothing Then rngDel.EntireRow.Delete    'Delete the not necessary rows
    Application.DisplayAlerts = True: Application.ScreenUpdating = True
End Sub

随着您向范围中添加更多 cells/areas,并集会逐渐变慢(请参阅此处的数字:)。如果您“自下而上”地工作,您可以每(例如)500 行删除 rngDel,但您不能采用这种方法,因为您正在工作 top-down.

这是一种不同的方法 - 将单元格添加到集合中,然后使用 batch-delete 过程在最后处理集合“bottom-up”。

Sub TestRowDeletion()

    Dim rngRows As Range, data, rngDel As Range, i As Long
    Dim t, nRows As Long, colCells As New Collection
    
    Set rngRows = Range("A1:A10000") '10k rows for testing
    
    'Approach #1 - your existing method
    DummyData rngRows     'populate some dummy data
    data = rngRows.Value
    t = Timer
    For i = 1 To UBound(data, 1)
        'removing ~25% of cells...
        If data(i, 1) > 0.75 Then BuildRange rngDel, rngRows.Cells(i)
    Next i
    If Not rngDel Is Nothing Then rngDel.EntireRow.Delete
    Debug.Print "Regular single delete", Timer - t

    'Approach #2 - batch-deleting rows
    DummyData rngRows 'reset data
    data = rngRows.Value
    t = Timer
    For i = 1 To UBound(data, 1)
        If data(i, 1) > 0.75 Then colCells.Add rngRows.Cells(i)
    Next i
    RemoveRows colCells
    Debug.Print "Batch-deleted", Timer - t

    'Approach #3 - array of "delete" flags plus SpecialCells()
    DummyData rngRows 'reset data
    data = rngRows.Value
    t = Timer
    ReDim flags(1 To UBound(data, 1), 1 To UBound(data, 2))
    For i = 1 To UBound(data, 1)
        If data(i, 1) > 0.75 Then
            flags(i, 1) = "x"
            bDelete = True 'flag we have rows to delete
        End If
    Next i
    If bDelete Then
        With rngRows.Offset(0, 10) 'use an empty column....
            .Value = flags  'populate with flags for deletion
            .SpecialCells(xlCellTypeConstants).EntireRow.Delete
        End With
    End If
    Debug.Print "Specialcells", Timer - t

End Sub

'Delete the row for any cell in `col`
'  cells were added to `col` in a "top down" order
Sub RemoveRows(col As Collection)
    Dim rngDel As Range, n As Long
    For n = col.Count To 1 Step -1 'working from the bottom up...
        BuildRange rngDel, col(n)
        If n Mod 250 = 0 Then
            rngDel.EntireRow.Delete
            Set rngDel = Nothing
        End If
    Next n
    If Not rngDel Is Nothing Then rngDel.EntireRow.Delete
End Sub

Sub DummyData(rng As Range)
    With rng
        .Formula = "=RAND()"
        .Value = .Value
    End With
End Sub

Sub BuildRange(ByRef rngTot As Range, rngAdd As Range)
    If rngTot Is Nothing Then
        Set rngTot = rngAdd
    Else
        Set rngTot = Application.Union(rngTot, rngAdd)
    End If
End Sub

时间(秒)- 请注意 single-delete 和 batch-delete 方法在添加更多行时有何不同。

# of rows deleted         ~2.5k/10k    ~5k/20k     ~7.5k/30k 
------------------------------------------------------------
1. Regular single delete     10.01         65.9       226
2. Batch-deleted             2.2           4.7        7.8
3. SpecialCells              1.6           3.1        4.7

您还可以考虑在数据集中填充“删除”标志,然后使用 autofilter/delete 可见行方法(编辑:添加为方法 #3)

将其发布为您实际用例的有效(但更快)版本,因为我的其他答案实际上只是关于安排不同方法的时间。

Sub DeleteSimilarRowsCombineColumnN()

    Const SEP As String = ","
    Dim arrKeys, arrVals, arrFlags, rngRows As Range, rngVals As Range, i As Long, key, currKey, s As String
    Dim ws As Worksheet, ub As Long, t, n As Long
    
    t = Timer
    Set ws = ActiveSheet
    Set ws = ActiveSheet
    Set rngRows = ws.Range("A1:A" & ws.Cells(Rows.Count, 1).End(xlUp).Row)
    Set rngVals = rngRows.EntireRow.Columns("N")
    
    arrKeys = rngRows.Value
    ub = UBound(arrKeys, 1)
    arrVals = rngVals.Value
    ReDim arrFlags(1 To UBound(arrKeys, 1), 1 To 1)
 
    currKey = Chr(0)     'non-existing key...
    For i = ub To 1 Step -1                      'looping from bottom up
        key = arrKeys(i, 1)                      'this row's key
        If key <> currKey Then                   'different key from row below?
            If i < ub Then arrVals(i + 1, 1) = s 'populate the collected info for any previous key
            s = arrVals(i, 1)                    'collect this row's "N" value
            currKey = key                        'set as current key
        Else
            If i < ub Then
                arrFlags(i + 1, 1) = "x" 'flag for deletion
                n = n + 1
            End If
            s = arrVals(i, 1) & SEP & s             'concatenate the "N" value
        End If
    Next i
    arrVals(1, 1) = s                              'populate the last (first) row...
    rngVals.Value = arrVals                        'drop the concatenated values
    
    If n > 0 Then    'any rows to delete?
        Debug.Print "About to delete " & n & " of " & ub & " rows", Timer - t
        With rngRows.Offset(0, 100) 'use any empty column
            .Value = arrFlags
            .SpecialCells(xlCellTypeConstants).EntireRow.Delete
        End With
        Debug.Print "Done deleting in " & Round(Timer - t, 2) & " sec"
    End If
End Sub