根据重复项的出现删除重复项
Removing duplicates based on their occurrence
我想检查特定列 (W) 是否有重复项(出现次数存储在另一列 (AZ) 中),然后以这种方式删除所有行:
- 值在列中找到两次 - 仅删除包含该值的一行。
- 在列中找到值的次数更多 - 删除所有包含该值的行。
我的代码工作得很好,但有时它没有删除所有重复项。有什么改进的想法吗?
编辑:更新后的代码工作得很好,除了它总是遗漏一个重复项并且不删除它。
fin = ws.UsedRange.Rows.count
For i = 2 To fin
ws.Range("AZ" & i).value = Application.WorksheetFunction.CountIf(ws.Range("W2:W" & fin), ws.Range("W" & i))
Next i
For j = fin To 2 Step -1
If ws.Range("AZ" & j).value > 2 Then
ws.Range("AZ" & j).EntireRow.Delete
fin = ws.UsedRange.Rows.count
ElseIf ws.Range("AZ" & j).value = 2 Then
Set rng = Range("W:W").Find(Range("W" & j).value, , xlValues, xlWhole, , xlNext)
rngRow = rng.Row
If rngRow <> j Then
ws.Range("AZ" & rngRow) = "1"
ws.Range("AZ" & j).EntireRow.Delete
fin = ws.UsedRange.Rows.count
Else
MsgBox "Error at row " & rngRow
End If
End If
Next j
无需在第二部分使用低效的第二个循环,只需使用像这样的实时计数
fin = ws.UsedRange.Rows.count
For i = 2 To fin
ws.Range("AZ" & i).value = Application.WorksheetFunction.CountIf(ws.Range("W2:W" & fin), ws.Range("W" & i))
Next i
For j = fin To 2 Step -1
If ws.Range("AZ" & j).value > 2 OR Application.WorksheetFunction.CountIf(ws.Range("W2:W" & fin), ws.Range("W" & j)) = 2 Then
ws.Range("AZ" & j).EntireRow.Delete
End If
Next j
如果速度是个问题,这里有一个应该更快的方法,因为它创建了一个要删除的行的集合,然后将它们删除。由于除实际的行删除之外的所有操作都在 VBA 中完成,因此来回调用工作表的次数要少得多。
如内联评论中所述,可以加快例程。
如果仍然太慢,根据工作表的大小,将整个工作表读入一个 VBA 数组可能是可行的;重复测试;将结果写回一个新数组并将其写到工作表中。 (不过,如果您的工作表太大,此方法可能 运行 内存不足)。
无论如何,我们需要一个 Class 模块,你必须重命名 cPhrases,以及一个 常规模块
Class 模块
Option Explicit
Private pPhrase As String
Private pCount As Long
Private pRowNums As Collection
Public Property Get Phrase() As String
Phrase = pPhrase
End Property
Public Property Let Phrase(Value As String)
pPhrase = Value
End Property
Public Property Get Count() As Long
Count = pCount
End Property
Public Property Let Count(Value As Long)
pCount = Value
End Property
Public Property Get RowNums() As Collection
Set RowNums = pRowNums
End Property
Public Function ADDRowNum(Value As Long)
pRowNums.Add Value
End Function
Private Sub Class_Initialize()
Set pRowNums = New Collection
End Sub
常规模块
Option Explicit
Sub RemoveDuplicateRows()
Dim wsSrc As Worksheet
Dim vSrc As Variant
Dim CP As cPhrases, colP As Collection, colRowNums As Collection
Dim I As Long, K As Long
Dim R As Range
'Data worksheet
Set wsSrc = Worksheets("sheet1")
'Read original data into VBA array
With wsSrc
vSrc = .Range(.Cells(1, "W"), .Cells(.Rows.Count, "W").End(xlUp))
End With
'Collect list of items, counts and row numbers to delete
'Collection object will --> error when trying to add
' duplicate key. Use that error to increment the count
Set colP = New Collection
On Error Resume Next
For I = 2 To UBound(vSrc, 1)
Set CP = New cPhrases
With CP
.Phrase = vSrc(I, 1)
.Count = 1
.ADDRowNum I
colP.Add CP, CStr(.Phrase)
Select Case Err.Number
Case 457 'duplicate
With colP(CStr(.Phrase))
.Count = .Count + 1
.ADDRowNum I
End With
Err.Clear
Case Is <> 0 'some other error. Stop to debug
Debug.Print "Error: " & Err.Number, Err.Description
Stop
End Select
End With
Next I
On Error GoTo 0
'Rows to be deleted
Set colRowNums = New Collection
For I = 1 To colP.Count
With colP(I)
Select Case .Count
Case 2
colRowNums.Add .RowNums(2)
Case Is > 2
For K = 1 To .RowNums.Count
colRowNums.Add .RowNums(K)
Next K
End Select
End With
Next I
'Revers Sort the collection of Row Numbers
'For speed, if necessary, could use
' faster sort routine
RevCollBubbleSort colRowNums
'Delete Rows
'For speed, could create Unions of up to 30 rows at a time
Application.ScreenUpdating = False
With wsSrc
For I = 1 To colRowNums.Count
.Rows(colRowNums(I)).Delete
Next I
End With
Application.ScreenUpdating = True
End Sub
'Could use faster sort routine if necessary
Sub RevCollBubbleSort(TempCol As Collection)
Dim I As Long
Dim NoExchanges As Boolean
' Loop until no more "exchanges" are made.
Do
NoExchanges = True
' Loop through each element in the array.
For I = 1 To TempCol.Count - 1
' If the element is less than the element
' following it, exchange the two elements.
If TempCol(I) < TempCol(I + 1) Then
NoExchanges = False
TempCol.Add TempCol(I), after:=I + 1
TempCol.Remove I
End If
Next I
Loop While Not (NoExchanges)
End Sub
虽然你的逻辑基本没问题,但方法不是最有效的。 AutoFilter Method can quickly remove all counts greater than 2 and the Range.RemoveDuplicates¹ method 随后可以快速删除 W 列中仍然包含重复值的行之一。
Dim r As Long, c As Long
With ws
If .AutoFilterMode Then .AutoFilterMode = False
r = .Cells.SpecialCells(xlLastCell).Row
c = Application.Max(52, .Cells.SpecialCells(xlLastCell).Column)
With .Range("A1", .Cells(r, c)) '.UsedRange
With .Columns(52)
If IsEmpty(.Cells(1, 1)) Then .Cells(1, 1) = "count"
With .Resize(.Rows.Count - 1, 1).Offset(1, 0)
.Cells.FormulaR1C1 = "=COUNTIF(C[-29], RC[-29])"
.Cells = .Cells.Value
End With
.AutoFilter field:=1, Criteria1:=">2"
With .Resize(.Rows.Count - 1, 1).Offset(1, 0)
If CBool(Application.Subtotal(103, .Cells)) Then
.SpecialCells(xlCellTypeVisible).EntireRow.Delete
End If
End With
.AutoFilter
End With
.RemoveDuplicates Columns:=23, Header:=xlYes
End With
End With
当您重写 AZ 列中的计数值时,您可能会将 3 计数重写为 2,等等
¹ Range.RemoveDuplicates method 从下往上删除重复行。
我想检查特定列 (W) 是否有重复项(出现次数存储在另一列 (AZ) 中),然后以这种方式删除所有行:
- 值在列中找到两次 - 仅删除包含该值的一行。
- 在列中找到值的次数更多 - 删除所有包含该值的行。
我的代码工作得很好,但有时它没有删除所有重复项。有什么改进的想法吗?
编辑:更新后的代码工作得很好,除了它总是遗漏一个重复项并且不删除它。
fin = ws.UsedRange.Rows.count
For i = 2 To fin
ws.Range("AZ" & i).value = Application.WorksheetFunction.CountIf(ws.Range("W2:W" & fin), ws.Range("W" & i))
Next i
For j = fin To 2 Step -1
If ws.Range("AZ" & j).value > 2 Then
ws.Range("AZ" & j).EntireRow.Delete
fin = ws.UsedRange.Rows.count
ElseIf ws.Range("AZ" & j).value = 2 Then
Set rng = Range("W:W").Find(Range("W" & j).value, , xlValues, xlWhole, , xlNext)
rngRow = rng.Row
If rngRow <> j Then
ws.Range("AZ" & rngRow) = "1"
ws.Range("AZ" & j).EntireRow.Delete
fin = ws.UsedRange.Rows.count
Else
MsgBox "Error at row " & rngRow
End If
End If
Next j
无需在第二部分使用低效的第二个循环,只需使用像这样的实时计数
fin = ws.UsedRange.Rows.count
For i = 2 To fin
ws.Range("AZ" & i).value = Application.WorksheetFunction.CountIf(ws.Range("W2:W" & fin), ws.Range("W" & i))
Next i
For j = fin To 2 Step -1
If ws.Range("AZ" & j).value > 2 OR Application.WorksheetFunction.CountIf(ws.Range("W2:W" & fin), ws.Range("W" & j)) = 2 Then
ws.Range("AZ" & j).EntireRow.Delete
End If
Next j
如果速度是个问题,这里有一个应该更快的方法,因为它创建了一个要删除的行的集合,然后将它们删除。由于除实际的行删除之外的所有操作都在 VBA 中完成,因此来回调用工作表的次数要少得多。
如内联评论中所述,可以加快例程。 如果仍然太慢,根据工作表的大小,将整个工作表读入一个 VBA 数组可能是可行的;重复测试;将结果写回一个新数组并将其写到工作表中。 (不过,如果您的工作表太大,此方法可能 运行 内存不足)。
无论如何,我们需要一个 Class 模块,你必须重命名 cPhrases,以及一个 常规模块
Class 模块
Option Explicit
Private pPhrase As String
Private pCount As Long
Private pRowNums As Collection
Public Property Get Phrase() As String
Phrase = pPhrase
End Property
Public Property Let Phrase(Value As String)
pPhrase = Value
End Property
Public Property Get Count() As Long
Count = pCount
End Property
Public Property Let Count(Value As Long)
pCount = Value
End Property
Public Property Get RowNums() As Collection
Set RowNums = pRowNums
End Property
Public Function ADDRowNum(Value As Long)
pRowNums.Add Value
End Function
Private Sub Class_Initialize()
Set pRowNums = New Collection
End Sub
常规模块
Option Explicit
Sub RemoveDuplicateRows()
Dim wsSrc As Worksheet
Dim vSrc As Variant
Dim CP As cPhrases, colP As Collection, colRowNums As Collection
Dim I As Long, K As Long
Dim R As Range
'Data worksheet
Set wsSrc = Worksheets("sheet1")
'Read original data into VBA array
With wsSrc
vSrc = .Range(.Cells(1, "W"), .Cells(.Rows.Count, "W").End(xlUp))
End With
'Collect list of items, counts and row numbers to delete
'Collection object will --> error when trying to add
' duplicate key. Use that error to increment the count
Set colP = New Collection
On Error Resume Next
For I = 2 To UBound(vSrc, 1)
Set CP = New cPhrases
With CP
.Phrase = vSrc(I, 1)
.Count = 1
.ADDRowNum I
colP.Add CP, CStr(.Phrase)
Select Case Err.Number
Case 457 'duplicate
With colP(CStr(.Phrase))
.Count = .Count + 1
.ADDRowNum I
End With
Err.Clear
Case Is <> 0 'some other error. Stop to debug
Debug.Print "Error: " & Err.Number, Err.Description
Stop
End Select
End With
Next I
On Error GoTo 0
'Rows to be deleted
Set colRowNums = New Collection
For I = 1 To colP.Count
With colP(I)
Select Case .Count
Case 2
colRowNums.Add .RowNums(2)
Case Is > 2
For K = 1 To .RowNums.Count
colRowNums.Add .RowNums(K)
Next K
End Select
End With
Next I
'Revers Sort the collection of Row Numbers
'For speed, if necessary, could use
' faster sort routine
RevCollBubbleSort colRowNums
'Delete Rows
'For speed, could create Unions of up to 30 rows at a time
Application.ScreenUpdating = False
With wsSrc
For I = 1 To colRowNums.Count
.Rows(colRowNums(I)).Delete
Next I
End With
Application.ScreenUpdating = True
End Sub
'Could use faster sort routine if necessary
Sub RevCollBubbleSort(TempCol As Collection)
Dim I As Long
Dim NoExchanges As Boolean
' Loop until no more "exchanges" are made.
Do
NoExchanges = True
' Loop through each element in the array.
For I = 1 To TempCol.Count - 1
' If the element is less than the element
' following it, exchange the two elements.
If TempCol(I) < TempCol(I + 1) Then
NoExchanges = False
TempCol.Add TempCol(I), after:=I + 1
TempCol.Remove I
End If
Next I
Loop While Not (NoExchanges)
End Sub
虽然你的逻辑基本没问题,但方法不是最有效的。 AutoFilter Method can quickly remove all counts greater than 2 and the Range.RemoveDuplicates¹ method 随后可以快速删除 W 列中仍然包含重复值的行之一。
Dim r As Long, c As Long
With ws
If .AutoFilterMode Then .AutoFilterMode = False
r = .Cells.SpecialCells(xlLastCell).Row
c = Application.Max(52, .Cells.SpecialCells(xlLastCell).Column)
With .Range("A1", .Cells(r, c)) '.UsedRange
With .Columns(52)
If IsEmpty(.Cells(1, 1)) Then .Cells(1, 1) = "count"
With .Resize(.Rows.Count - 1, 1).Offset(1, 0)
.Cells.FormulaR1C1 = "=COUNTIF(C[-29], RC[-29])"
.Cells = .Cells.Value
End With
.AutoFilter field:=1, Criteria1:=">2"
With .Resize(.Rows.Count - 1, 1).Offset(1, 0)
If CBool(Application.Subtotal(103, .Cells)) Then
.SpecialCells(xlCellTypeVisible).EntireRow.Delete
End If
End With
.AutoFilter
End With
.RemoveDuplicates Columns:=23, Header:=xlYes
End With
End With
当您重写 AZ 列中的计数值时,您可能会将 3 计数重写为 2,等等
¹ Range.RemoveDuplicates method 从下往上删除重复行。