确定一个词是否在匹配项的 125 个词以内
Determine if a word is within 125 words of a match
我想遍历一个文档,对于每个单词,看看是否在 250 个单词(后面 125 个,前面 125 个)之内匹配。
如果有匹配项,请突出显示。某些词被排除在外。这些存储在字典中。
为了测试我正在使用的循环,
For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
Debug.Print wrd & "----" & wrd.Start
Next wrd
Next para
问题:
“我讨厌去灵界旅行”这句话中的“世界”,在找7的时候打印32。
我想做这样的事情:
If wrd < 125 Then
Set wrdRng = ActiveDocument.Range(Start:=wrd - 125, End:=ActiveDocument.Words(wrd + 125).End)
Else
Set wrdRng = ActiveDocument.Range(Start:=0, End:=ActiveDocument.Words(250 - wrd).End)
End if
编辑:
我目前使用的代码在大约 13 分钟内完成一个 50,000 字文档的循环。那太长了。谁有更好的选择?
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String
StartTime = Timer
For Each Para In ActiveDocument.Paragraphs
For Each wrd In Para.Range.Words
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<(McKnight)*>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWildcards = True
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
i = i + 1
.Words.First.HighlightColorIndex = wdBrightGreen
.Words.Last.HighlightColorIndex = wdBrightGreen
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
Debug.Print wrd
Next wrd
Next Para
MinutesElapsed = Format((Timer - StartTime) / 86400, "hh:mm:ss")
MsgBox "This code ran successfully in " & MinutesElapsed & " minutes", vbInformation
End Sub
编辑:
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String
StartTime = Timer
For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<(wrd)*>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWildcards = True
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
i = i + 1
.Words.First.HighlightColorIndex = wdBrightGreen
.Words.Last.HighlightColorIndex = wdBrightGreen
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
'Debug.Print wrd
Next wrd
Next para
MinutesElapsed = Format((Timer - StartTime) / 86400, "hh:mm:ss")
MsgBox "This code ran successfully in " & MinutesElapsed & " minutes", vbInformation
End Sub
尝试以下操作。除此之外,它还允许您指定要忽略的词(例如介词、冠词等)。此外,不同的突出显示用于识别给定单词上的所有 'hits'。进度报告显示在状态栏上。在我的笔记本电脑上,一份 50,000 字 'lorem' 的文档大约需要 6:40。
Option Explicit
Dim ArrOut() As String
Sub Demo()
' Turn Off Screen Updating
Application.ScreenUpdating = False
Dim eTime As Single
' Start Timing
eTime = Timer
Dim wdDoc As Document, StrFnd As String, StrTmp As String, Rng As Range
Dim SBar As Boolean, bTrk As Boolean, h As Long, i As Long, j As Long
' Store current Status Bar status, then switch on
SBar = Application.DisplayStatusBar
Application.DisplayStatusBar = True
Set wdDoc = ActiveDocument
With wdDoc
' Store current Track Changes status, then switch off
bTrk = .TrackRevisions: .TrackRevisions = False
'Display status
Application.StatusBar = "Building word list"
'Compile the Find list
Call BuildWordList(.Range.Text)
With .Range.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = ""
.Replacement.Text = ""
.Format = False
.MatchCase = False
.MatchWholeWord = True
.Wrap = wdFindStop
.Execute
End With
End With
'Process all words in the concordance
For i = 0 To UBound(ArrOut())
StrFnd = ArrOut(i)
h = i Mod 14
If h < 6 Then
h = h + 2
Else
h = h + 3
End If
'Display current word
Application.StatusBar = "Processing: " & StrFnd
'Use wildcards, if possible, for extra speed
If Len(StrFnd) < 4 Then
StrTmp = ""
For j = 1 To Len(StrFnd)
StrTmp = StrTmp & "[" & UCase(Mid(StrFnd, j, 1)) & Mid(StrFnd, j, 1) & "]"
Next
StrFnd = StrTmp
With wdDoc.Range
With .Find
.MatchWildcards = True
.Text = "<(" & StrFnd & ")>*<(" & StrFnd & ")>"
.Forward = True
.Wrap = wdFindStop
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
.Words.Last.HighlightColorIndex = h
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Else
With wdDoc.Range
With .Find
.MatchWildcards = False
.Text = StrFnd
.Forward = True
.Wrap = wdFindStop
.Execute
End With
Set Rng = .Duplicate
Do While .Find.Execute
Rng.End = .Duplicate.End
With Rng
If .ComputeStatistics(wdStatisticWords) < 100 Then
If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
.Words.Last.HighlightColorIndex = h
End If
End With
Set Rng = .Duplicate
.Collapse wdCollapseEnd
Loop
End With
End If
DoEvents
Next
' Restore original Track Changes status
wdDoc.TrackRevisions = bTrk
' Clear the Status Bar
Application.StatusBar = False
' Restore original Status Bar status
Application.DisplayStatusBar = SBar
' Restore Screen Updating
Application.ScreenUpdating = True
' Calculate elapsed time
eTime = (Timer - eTime + 86400) Mod 86400 ' Just in case execution time spans midnight
MsgBox "Execution took " & Format(eTime / 86400, "hh:mm:ss") & " to process"
End Sub
Sub BuildWordList(StrIn As String)
Dim StrFnd As String, i As Long, j As Long, k As Long
'Define the exlusions list
Const StrExcl As String = "a,am,and,are,as,at,be,but,by,can,cm,did,do,does,eg," & _
"en,eq,etc,for,get,go,got,has,have,he,her,him,how,i,ie,if,in,into,is," & _
"it,its,me,mi,mm,my,na,nb,no,not,of,off,ok,on,one,or,our,out,re,she," & _
"so,the,their,them,they,t,to,was,we,were,who,will,would,yd,you,your"
'Strip out unwanted characters
For i = 1 To 255
Select Case i
Case 1 To 31, 33 To 64, 91 To 96, 123 To 144, 147 To 191, 247
Do While InStr(StrIn, Chr(i)) > 0
StrIn = Replace(StrIn, Chr(i), " ")
Loop
End Select
Next
'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(145), "'"), Chr(146), "'"), "' ", " "), " '", " ")
'Convert to lowercase
StrIn = " " & LCase(StrIn) & " "
'Process the exclusions list
For i = 0 To UBound(Split(StrExcl, ","))
StrFnd = " " & Split(StrExcl, ",")(i) & " "
Do While InStr(StrIn, StrFnd) > 0
StrIn = Replace(StrIn, " " & Split(StrExcl, ",")(i) & " ", " ")
Loop
Next
'Clean up any duplicate spaces
Do While InStr(StrIn, " ") > 0
StrIn = Replace(StrIn, " ", " ")
Loop
i = 0
Do While UBound(Split(StrIn, " ")) > 1
StrFnd = " " & Split(StrIn, " ")(1) & " ": j = Len(StrIn)
'Find how many occurences of each word there are in the document
StrIn = Replace(StrIn, StrFnd, " ")
k = (j - Len(StrIn)) / (Len(StrFnd) - 1)
'If there's more than one occurence, add the word to our Find list
If k > 1 Then
ReDim Preserve ArrOut(i)
ArrOut(i) = Trim(StrFnd)
i = i + 1
End If
Loop
WordBasic.SortArray ArrOut()
End Sub
我想遍历一个文档,对于每个单词,看看是否在 250 个单词(后面 125 个,前面 125 个)之内匹配。
如果有匹配项,请突出显示。某些词被排除在外。这些存储在字典中。
为了测试我正在使用的循环,
For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
Debug.Print wrd & "----" & wrd.Start
Next wrd
Next para
问题:
“我讨厌去灵界旅行”这句话中的“世界”,在找7的时候打印32。
我想做这样的事情:
If wrd < 125 Then
Set wrdRng = ActiveDocument.Range(Start:=wrd - 125, End:=ActiveDocument.Words(wrd + 125).End)
Else
Set wrdRng = ActiveDocument.Range(Start:=0, End:=ActiveDocument.Words(250 - wrd).End)
End if
编辑:
我目前使用的代码在大约 13 分钟内完成一个 50,000 字文档的循环。那太长了。谁有更好的选择?
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String
StartTime = Timer
For Each Para In ActiveDocument.Paragraphs
For Each wrd In Para.Range.Words
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<(McKnight)*>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWildcards = True
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
i = i + 1
.Words.First.HighlightColorIndex = wdBrightGreen
.Words.Last.HighlightColorIndex = wdBrightGreen
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
Debug.Print wrd
Next wrd
Next Para
MinutesElapsed = Format((Timer - StartTime) / 86400, "hh:mm:ss")
MsgBox "This code ran successfully in " & MinutesElapsed & " minutes", vbInformation
End Sub
编辑:
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String
StartTime = Timer
For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<(wrd)*>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWildcards = True
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
i = i + 1
.Words.First.HighlightColorIndex = wdBrightGreen
.Words.Last.HighlightColorIndex = wdBrightGreen
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
'Debug.Print wrd
Next wrd
Next para
MinutesElapsed = Format((Timer - StartTime) / 86400, "hh:mm:ss")
MsgBox "This code ran successfully in " & MinutesElapsed & " minutes", vbInformation
End Sub
尝试以下操作。除此之外,它还允许您指定要忽略的词(例如介词、冠词等)。此外,不同的突出显示用于识别给定单词上的所有 'hits'。进度报告显示在状态栏上。在我的笔记本电脑上,一份 50,000 字 'lorem' 的文档大约需要 6:40。
Option Explicit
Dim ArrOut() As String
Sub Demo()
' Turn Off Screen Updating
Application.ScreenUpdating = False
Dim eTime As Single
' Start Timing
eTime = Timer
Dim wdDoc As Document, StrFnd As String, StrTmp As String, Rng As Range
Dim SBar As Boolean, bTrk As Boolean, h As Long, i As Long, j As Long
' Store current Status Bar status, then switch on
SBar = Application.DisplayStatusBar
Application.DisplayStatusBar = True
Set wdDoc = ActiveDocument
With wdDoc
' Store current Track Changes status, then switch off
bTrk = .TrackRevisions: .TrackRevisions = False
'Display status
Application.StatusBar = "Building word list"
'Compile the Find list
Call BuildWordList(.Range.Text)
With .Range.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = ""
.Replacement.Text = ""
.Format = False
.MatchCase = False
.MatchWholeWord = True
.Wrap = wdFindStop
.Execute
End With
End With
'Process all words in the concordance
For i = 0 To UBound(ArrOut())
StrFnd = ArrOut(i)
h = i Mod 14
If h < 6 Then
h = h + 2
Else
h = h + 3
End If
'Display current word
Application.StatusBar = "Processing: " & StrFnd
'Use wildcards, if possible, for extra speed
If Len(StrFnd) < 4 Then
StrTmp = ""
For j = 1 To Len(StrFnd)
StrTmp = StrTmp & "[" & UCase(Mid(StrFnd, j, 1)) & Mid(StrFnd, j, 1) & "]"
Next
StrFnd = StrTmp
With wdDoc.Range
With .Find
.MatchWildcards = True
.Text = "<(" & StrFnd & ")>*<(" & StrFnd & ")>"
.Forward = True
.Wrap = wdFindStop
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
.Words.Last.HighlightColorIndex = h
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Else
With wdDoc.Range
With .Find
.MatchWildcards = False
.Text = StrFnd
.Forward = True
.Wrap = wdFindStop
.Execute
End With
Set Rng = .Duplicate
Do While .Find.Execute
Rng.End = .Duplicate.End
With Rng
If .ComputeStatistics(wdStatisticWords) < 100 Then
If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
.Words.Last.HighlightColorIndex = h
End If
End With
Set Rng = .Duplicate
.Collapse wdCollapseEnd
Loop
End With
End If
DoEvents
Next
' Restore original Track Changes status
wdDoc.TrackRevisions = bTrk
' Clear the Status Bar
Application.StatusBar = False
' Restore original Status Bar status
Application.DisplayStatusBar = SBar
' Restore Screen Updating
Application.ScreenUpdating = True
' Calculate elapsed time
eTime = (Timer - eTime + 86400) Mod 86400 ' Just in case execution time spans midnight
MsgBox "Execution took " & Format(eTime / 86400, "hh:mm:ss") & " to process"
End Sub
Sub BuildWordList(StrIn As String)
Dim StrFnd As String, i As Long, j As Long, k As Long
'Define the exlusions list
Const StrExcl As String = "a,am,and,are,as,at,be,but,by,can,cm,did,do,does,eg," & _
"en,eq,etc,for,get,go,got,has,have,he,her,him,how,i,ie,if,in,into,is," & _
"it,its,me,mi,mm,my,na,nb,no,not,of,off,ok,on,one,or,our,out,re,she," & _
"so,the,their,them,they,t,to,was,we,were,who,will,would,yd,you,your"
'Strip out unwanted characters
For i = 1 To 255
Select Case i
Case 1 To 31, 33 To 64, 91 To 96, 123 To 144, 147 To 191, 247
Do While InStr(StrIn, Chr(i)) > 0
StrIn = Replace(StrIn, Chr(i), " ")
Loop
End Select
Next
'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(145), "'"), Chr(146), "'"), "' ", " "), " '", " ")
'Convert to lowercase
StrIn = " " & LCase(StrIn) & " "
'Process the exclusions list
For i = 0 To UBound(Split(StrExcl, ","))
StrFnd = " " & Split(StrExcl, ",")(i) & " "
Do While InStr(StrIn, StrFnd) > 0
StrIn = Replace(StrIn, " " & Split(StrExcl, ",")(i) & " ", " ")
Loop
Next
'Clean up any duplicate spaces
Do While InStr(StrIn, " ") > 0
StrIn = Replace(StrIn, " ", " ")
Loop
i = 0
Do While UBound(Split(StrIn, " ")) > 1
StrFnd = " " & Split(StrIn, " ")(1) & " ": j = Len(StrIn)
'Find how many occurences of each word there are in the document
StrIn = Replace(StrIn, StrFnd, " ")
k = (j - Len(StrIn)) / (Len(StrFnd) - 1)
'If there's more than one occurence, add the word to our Find list
If k > 1 Then
ReDim Preserve ArrOut(i)
ArrOut(i) = Trim(StrFnd)
i = i + 1
End If
Loop
WordBasic.SortArray ArrOut()
End Sub