I want to loop through a document, and for each word, see if there is a match within 250 words (125 behind and 125 ahead).
If there is a match(s), highlight it. Certain words are excluded. These are stored in a dictionary.
To test the loop I am using,
For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
Debug.Print wrd & "----" & wrd.Start
Next wrd
Next para
The problem:
"World" in the sentence "I hate traveling to the spirit world", prints 32 when I am looking for 7.
I want to do something like:
If wrd < 125 Then
Set wrdRng = ActiveDocument.Range(Start:=wrd - 125, End:=ActiveDocument.Words(wrd + 125).End)
Else
Set wrdRng = ActiveDocument.Range(Start:=0, End:=ActiveDocument.Words(250 - wrd).End)
End if
Edit:
The current code I'm using completes a loop on a 50,000 word document in about 13 minutes. That is entirely too long. Anyone have a better alternative?
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String
StartTime = Timer
For Each Para In ActiveDocument.Paragraphs
For Each wrd In Para.Range.Words
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<(McKnight)*\1>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWildcards = True
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
i = i + 1
.Words.First.HighlightColorIndex = wdBrightGreen
.Words.Last.HighlightColorIndex = wdBrightGreen
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
Debug.Print wrd
Next wrd
Next Para
MinutesElapsed = Format((Timer - StartTime) / 86400, "hh:mm:ss")
MsgBox "This code ran successfully in " & MinutesElapsed & " minutes", vbInformation
End Sub
EDIT:
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String
StartTime = Timer
For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<(wrd)*\1>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWildcards = True
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
i = i + 1
.Words.First.HighlightColorIndex = wdBrightGreen
.Words.Last.HighlightColorIndex = wdBrightGreen
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
'Debug.Print wrd
Next wrd
Next para
MinutesElapsed = Format((Timer - StartTime) / 86400, "hh:mm:ss")
MsgBox "This code ran successfully in " & MinutesElapsed & " minutes", vbInformation
End Sub
Try the following. Amongst other things, it allows you to specify words to ignore (e.g. prepositions, articles, etc.). Additionally different highlights are used to identify all 'hits' on a given word. A progress report is given on the status bar. On my laptop, it takes about 6:40 for a 50,000 word 'lorem' document.
Option Explicit
Dim ArrOut() As String
Sub Demo()
' Turn Off Screen Updating
Application.ScreenUpdating = False
Dim eTime As Single
' Start Timing
eTime = Timer
Dim wdDoc As Document, StrFnd As String, StrTmp As String, Rng As Range
Dim SBar As Boolean, bTrk As Boolean, h As Long, i As Long, j As Long
' Store current Status Bar status, then switch on
SBar = Application.DisplayStatusBar
Application.DisplayStatusBar = True
Set wdDoc = ActiveDocument
With wdDoc
' Store current Track Changes status, then switch off
bTrk = .TrackRevisions: .TrackRevisions = False
'Display status
Application.StatusBar = "Building word list"
'Compile the Find list
Call BuildWordList(.Range.Text)
With .Range.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = ""
.Replacement.Text = ""
.Format = False
.MatchCase = False
.MatchWholeWord = True
.Wrap = wdFindStop
.Execute
End With
End With
'Process all words in the concordance
For i = 0 To UBound(ArrOut())
StrFnd = ArrOut(i)
h = i Mod 14
If h < 6 Then
h = h + 2
Else
h = h + 3
End If
'Display current word
Application.StatusBar = "Processing: " & StrFnd
'Use wildcards, if possible, for extra speed
If Len(StrFnd) < 4 Then
StrTmp = ""
For j = 1 To Len(StrFnd)
StrTmp = StrTmp & "[" & UCase(Mid(StrFnd, j, 1)) & Mid(StrFnd, j, 1) & "]"
Next
StrFnd = StrTmp
With wdDoc.Range
With .Find
.MatchWildcards = True
.Text = "<(" & StrFnd & ")>*<(" & StrFnd & ")>"
.Forward = True
.Wrap = wdFindStop
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
.Words.Last.HighlightColorIndex = h
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Else
With wdDoc.Range
With .Find
.MatchWildcards = False
.Text = StrFnd
.Forward = True
.Wrap = wdFindStop
.Execute
End With
Set Rng = .Duplicate
Do While .Find.Execute
Rng.End = .Duplicate.End
With Rng
If .ComputeStatistics(wdStatisticWords) < 100 Then
If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
.Words.Last.HighlightColorIndex = h
End If
End With
Set Rng = .Duplicate
.Collapse wdCollapseEnd
Loop
End With
End If
DoEvents
Next
' Restore original Track Changes status
wdDoc.TrackRevisions = bTrk
' Clear the Status Bar
Application.StatusBar = False
' Restore original Status Bar status
Application.DisplayStatusBar = SBar
' Restore Screen Updating
Application.ScreenUpdating = True
' Calculate elapsed time
eTime = (Timer - eTime + 86400) Mod 86400 ' Just in case execution time spans midnight
MsgBox "Execution took " & Format(eTime / 86400, "hh:mm:ss") & " to process"
End Sub
Sub BuildWordList(StrIn As String)
Dim StrFnd As String, i As Long, j As Long, k As Long
'Define the exlusions list
Const StrExcl As String = "a,am,and,are,as,at,be,but,by,can,cm,did,do,does,eg," & _
"en,eq,etc,for,get,go,got,has,have,he,her,him,how,i,ie,if,in,into,is," & _
"it,its,me,mi,mm,my,na,nb,no,not,of,off,ok,on,one,or,our,out,re,she," & _
"so,the,their,them,they,t,to,was,we,were,who,will,would,yd,you,your"
'Strip out unwanted characters
For i = 1 To 255
Select Case i
Case 1 To 31, 33 To 64, 91 To 96, 123 To 144, 147 To 191, 247
Do While InStr(StrIn, Chr(i)) > 0
StrIn = Replace(StrIn, Chr(i), " ")
Loop
End Select
Next
'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(145), "'"), Chr(146), "'"), "' ", " "), " '", " ")
'Convert to lowercase
StrIn = " " & LCase(StrIn) & " "
'Process the exclusions list
For i = 0 To UBound(Split(StrExcl, ","))
StrFnd = " " & Split(StrExcl, ",")(i) & " "
Do While InStr(StrIn, StrFnd) > 0
StrIn = Replace(StrIn, " " & Split(StrExcl, ",")(i) & " ", " ")
Loop
Next
'Clean up any duplicate spaces
Do While InStr(StrIn, " ") > 0
StrIn = Replace(StrIn, " ", " ")
Loop
i = 0
Do While UBound(Split(StrIn, " ")) > 1
StrFnd = " " & Split(StrIn, " ")(1) & " ": j = Len(StrIn)
'Find how many occurences of each word there are in the document
StrIn = Replace(StrIn, StrFnd, " ")
k = (j - Len(StrIn)) / (Len(StrFnd) - 1)
'If there's more than one occurence, add the word to our Find list
If k > 1 Then
ReDim Preserve ArrOut(i)
ArrOut(i) = Trim(StrFnd)
i = i + 1
End If
Loop
WordBasic.SortArray ArrOut()
End Sub