Summary
Installation
vba ms-word
Written: Jun-2023

Steps to create a VBA macro to red-highlight repetitive words

The following script can be added to your MS-Word VBA environment in order to quickly scan and highlight the overuse of [English] words in the text. The logic will look for similar words within a 50 word radius to each word, and if it finds repetition it will mark them with a red-background highlight.

The logic will attempt to normalize all words when it does this - so "Fund", "Funding" and "Funds" will all be seen as the same word.

Once installed, to run the red-highlight, simply press AltF11 and select to run the redHighlightAdd function.

To undo the red-highlights, run the redHighlightsRemove function.

Installation

In MS-Word

  1. Press AltF11 to open the VBA Editor.
  2. Insert a new module by clicking Insert -> Module.
  3. Copy and paste the code below into the module window.
vba ::  redHighlightAdd
Option Compare Text
Sub redHighlightAdd()
    ' Create the dictionary object for storing normalized words
    Dim normalizedWordsDict As Object
    Dim wordList() As String
    Dim wordList0() As String
    Dim findRange As Range
    Dim t As String
    Dim ignoredWords() As String
    
    Application.ScreenUpdating = False
    
    Set normalizedWordsDict = CreateObject("Scripting.Dictionary")
    
    
    Application.StatusBar = "Adding Red highlights..."
    Set findRange = ActiveDocument.Content
    
    t = getVisibleCharacters(ActiveDocument.Range.text)
    wordList0 = Split(t, " ")
    
    
    ReDim wordList(LBound(wordList0) To UBound(wordList0))
    'For i = LBound(wordList0) To UBound(wordList0)
    '    ' Debug.Print wordList0(i)
    '    wordList(i) = GetNormalizedWord(wordList0(i))
    '    Debug.Print wordList0(i) & "=" & wordList(i)
    'Next i
    
    Dim reportedWords As New Collection
    Dim currentWord As String

    For i = LBound(wordList0) To UBound(wordList0)
        
        currentWord = wordList0(i)
        wordList(i) = GetNormalizedWord(currentWord)
        
        ' Check if the current word has been reported before
        Dim alreadyReported As Boolean
        alreadyReported = False
        On Error Resume Next
        
        reportedWords.Add currentWord, currentWord
        If Err.Number > 0 Then
            alreadyReported = True
        End If
        
        On Error GoTo 0
        
        If Not alreadyReported Then
            Debug.Print "[" & currentWord & "]=[" & wordList(i) & "]"
        Else
            ' Debug.Print "Skipped [" & currentWord & "]"
        End If
        x = 0
        
    Next i

    ignoredWords = Split("this,that,than,when,them,there,their,they,they're", ",")
    
    
    redHighlightsRemove
    
    Application.StatusBar = "Scanning words..."
    For i = 0 To UBound(wordList)
    
        ThisWord = wordList(i)
        
        'Application.ScreenUpdating = True
        Application.StatusBar = "Processing word " & (i + 1) & " [" & wordList(i) & "] of " & UBound(wordList) + 1
        'Application.ScreenUpdating = False
        
        If (Len(ThisWord) > 3 And Not IsIgnoredWord(ThisWord, ignoredWords)) Then

            normalizedWordI = ThisWord
            Debug.Print i & ": Considering " & normalizedWordI
            
            ' Loop through the next 100 words
            For j = i + 1 To Min(i + 50, UBound(wordList))
                ' Update the status bar with the progress
                ' Application.StatusBar = "Processing word " & (i + 1) & "." & j & " [" & wordList(i) & "][" & wordList(j) & "] of " & UBound(wordList) + 1
                
                ' Check if the word is in the ignored list
                If Not IsIgnoredWord(wordList(j), ignoredWords) Then
                    ' Normalize the words using lemmatization if they meet the length requirement
                    
                    normalizedWordJ = wordList(j)
                    
                    ' Check if the normalized words match (case-insensitive comparison)
                    If StrComp(normalizedWordI, normalizedWordJ, vbTextCompare) = 0 Then
                        ' Find and highlight the repeated word
                        ' Go to the specific word pointed to by j
                        Debug.Print
                        Debug.Print "Highlighted " & ThisWord
                        highlightWord i, wordList0
                        highlightWord j, wordList0
                        Debug.Print
                    End If
                End If
            Next j
        End If
    Next i
    ' ...
    
    ' Clear the status bar
    Application.StatusBar = "Highlighting finished"
    Debug.Print "Highlighting finished"
    Application.ScreenUpdating = True
End Sub


Sub highlightWord(ByVal wordIndex As Long, wordList() As String)
    ' Declare and set the findRange object
    'Dim findRange As Range
    ' Dim wordList() As String
    Dim t As String
    Dim selectedRange As Range
    Dim x As Integer, x0 As Integer, w As Long, safe As Long, tracer As String, word As String, finished As Boolean
    
    'Set findRange = ActiveDocument.Content
    
    ' Debug.Print findRange.Text
    
    ' t = getVisibleCharacters(ActiveDocument.Range.text)
    ' wordList = Split(t, " ")
    finished = False
    w = 0
    x = 0
    safe = 500000
    
    Do Until finished Or safe = 0
       word = wordList(w)
       x0 = x
       x = x + Len(word) + 1
       tracer = "  " & w & " Stepping over [" & word & "] from " & x0 & " to " & x
       If word = "" Then
          'wordIndex = wordIndex + 1
       End If
       If (w >= wordIndex And word <> "") Then
          finished = True
       End If
       w = w + 1
       safe = safe - 1
       'Debug.Print tracer & " ...[" & word & "] " & safe
    Loop
    ' ActiveDocument.Range(Start:=x, End:=x + Len(wordList(wordIndex))).Select
    
    
    ' ShowCharacterASCII "a", word
    Set selectedRange = ActiveDocument.Range(Start:=x0, End:=x - 1)
    selectedRange.Select
    selectedRange.HighlightColorIndex = wdRed
        
    'ShowCharacterASCII "b", selectedRange.text
    Debug.Print "Highlighting word " & wordIndex & " [" & word & "] at " & x & " and " & (x + Len(word)) & "  = [" & selectedRange.text & "]."
    ' selectedRange.Shading.BackgroundPatternColorIndex = wdRed
End Sub
Function getVisibleCharacters(ByVal text As String) As String
    Dim regex As Object
    Set regex = CreateObject("VBScript.RegExp")
    ' Use regular expression to remove non-printable characters
    Dim strPattern As String: strPattern = "[^a-zA-Z0-9'ā€™-]" ' The regex pattern to find special characters

    With regex
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = strPattern
    End With

    ' regex.Pattern = "[^ws!@#$%^&*()-+=<>?/.,;:'""[{]}|_`~" & vbCrLf & vbLf & "]"
    getVisibleCharacters = regex.Replace(text, " ")
End Function
Sub ShowCharacterASCII(ByVal prefix As String, ByVal text As String)
    Dim i As Integer
    For i = 1 To Len(text)
        Debug.Print prefix & " : " & Mid(text, i, 1) & ": " & Asc(Mid(text, i, 1))
    Next i
    Debug.Print
End Sub
Function getReplaceMultipleSpaces(ByVal originalText As String) As String
    Dim modifiedText As String
    Dim regex As Object
    Set regex = CreateObject("VBScript.RegExp")
    regex.Pattern = "s+"
    modifiedText = regex.Replace(originalText, " ")
    getReplaceMultipleSpaces = modifiedText
End Function
Function GetWordAtPosition(ByVal wordIndex As Long) As String
    Dim wordList() As String
    ' wordList = Split(ActiveDocument.Content.Text, " ")
    Dim t As String
    t = getRemovePunctuation(ActiveDocument.Range.text)
    wordList = Split(t, " ")
    
    If wordIndex > 0 And wordIndex <= UBound(wordList) + 1 Then
        GetWordAtPosition = Trim(wordList(wordIndex - 1))
    Else
        GetWordAtPosition = ""
    End If
End Function


Sub redHighlightsRemove()
    Debug.Print "redHighlightsRemove"
    
    Dim doc As Document
    Dim counter As Long
    Dim iterationCount As Long
    Dim maxIterations As Long
    Dim currentPosition As Long
    Dim docLength As Long
    Dim characterRange As Range
    Dim hasRedHighlighting As Boolean
    
    Set doc = ActiveDocument
    
    Application.StatusBar = "Removing Red highlights..."
    counter = 0
    iterationCount = 0
    maxIterations = 10000000
    currentPosition = 0
    docLength = doc.Range.End
    
    Set characterRange = doc.Content
    
    Const redRGB As Long = 255 ' R = 255
    Const greenRGB As Long = 0 ' G = 0
    Const blueRGB As Long = 0 ' B = 0
    
    currentPosition = 0
    Do While currentPosition < docLength
        characterRange.Start = currentPosition ' Set the start position of the character range
        characterRange.End = currentPosition + 1 ' Set the end position of the character range
        
        ' Debug.Print "Character: " & currentPosition & " = " & characterRange.text & " = " & characterRange.Font.Shading.BackgroundPatternColor & " = " & characterRange.HighlightColorIndex
        
        If characterRange.HighlightColorIndex = wdRed Then
            characterRange.HighlightColorIndex = wdNoHighlight
            counter = counter + 1
        End If
        
        iterationCount = iterationCount + 1
        If iterationCount >= maxIterations Then
            Exit Do
        End If
        
        currentPosition = currentPosition + 1
    Loop
    
    Application.StatusBar = "Red highlights removed: " & counter
    Debug.Print "redHighlightsRemove done"
End Sub
Function ContainsHyphen(word As String) As Boolean
    ContainsHyphen = (InStr(word, "-") > 0)
End Function
Function Min(a As Long, b As Long) As Long
    If a < b Then
        Min = a
    Else
        Min = b
    End If
End Function
Function Max(a As Long, b As Long) As Long
    If a > b Then
        Max = a
    Else
        Max = b
    End If
End Function











Function IsIgnoredWord(ByVal word As String, ignoredWords() As String) As Boolean
    Dim i As Long
    For i = LBound(ignoredWords) To UBound(ignoredWords)
        If LCase(Trim(word)) = LCase(Trim(ignoredWords(i))) Then
            IsIgnoredWord = True
            Exit Function
        End If
    Next i
    IsIgnoredWord = False
End Function


Sub TestHighlight()
    Dim w As Integer
    w = 4
        redHighlightsRemove
    Do While w < 30
        Debug.Print "-- " & w
        highlightWord w
        w = w + 1
    Loop
    Debug.Print "Done"
End Sub



You also need the following word-normalizer that will TRY TO change plurals and variants into their basic versions so that those are considered the same word, e.g. "Reading", "Reader", "Reads" and "Read" are all considered the same:

vba ::  get normlized versions of words
Function GetNormalizedWord(ByVal word As String) As String
    ' Remove common word endings
    word = LCase(word)
    x = Len(word)
    
    If Len(word) > 3 Then
        If Right(word, 3) = "ies" Then
            If Right(word, 5) = "chies" Then
                word = Left(word, x - 3) & "y"
            Else
                word = Left(word, x - 3)
            End If
        ElseIf Right(word, 2) = "ā€™s" Then
            word = Left(word, x - 2)
            
        ElseIf Right(word, 2) = "sā€™" Then
            word = Left(word, x - 1)
            
        ElseIf Right(word, 3) = "ous" Then
            word = Left(word, x - 2) & "n" ' e.g. reglious
        
        ElseIf Right(word, 1) = "s" Then
            If Right(word, 3) = "hes" Then
                word = Left(word, x - 2)
            ElseIf word = "jesus" Then
                ' skip
            Else
                word = Left(word, x - 1)
            End If
        End If
        
        
        x = Len(word)
            
        If Right(word, 2) = "ed" Then
            If Right(word, 3) = "ied" Then
                word = Left(word, x - 3) & "y"
                
            ElseIf Right(word, 3) = "bed" Then
                word = Left(word, x - 1)
                
            ElseIf Right(word, 3) = "red" Then
                If Right(word, 4) = "ered" Then
                    word = Left(word, x - 2)
                Else
                    word = Left(word, x - 1)
                End If
                
            ElseIf Right(word, 5) = "rated" Then
                word = Left(word, x - 1)
            Else
                word = Left(word, x - 2)
            End If
            
        ElseIf Right(word, 6) = "tional" Then ' e.g. traditional
            word = Left(word, x - 2)
            
        ElseIf Right(word, 3) = "yal" Then ' e.g. portrayal
            word = Left(word, x - 2)
        
        ElseIf Right(word, 2) = "ly" Then
            word = Left(word, x - 2)
            
        ElseIf Right(word, 3) = "ing" Then
            If Right(word, 4) = "sing" Then ' e.g. symbolising
                word = Left(word, x - 3) & "e"
            ElseIf Right(word, 4) = "zing" Then ' e.g. symbolizing
                word = Left(word, x - 3) & "e"
            ElseIf Right(word, 6) = "hering" Then ' e.g. adhering
                word = Left(word, x - 3) & "e"
            Else
                word = Left(word, x - 3)
            End If
            
        ElseIf Right(word, 3) = "ized" Then
            word = Left(word, x - 3)
            
        ElseIf Right(word, 3) = "ism" Then
            word = Left(word, x - 3)
            
        ElseIf Right(word, 3) = "ion" Then
            If Right(word, 7) = "uration" Then
                word = Left(word, x - 5) & "e"
            ElseIf Right(word, 5) = "ation" Then
                word = Left(word, x - 3) & "e"
            ElseIf word = "passion" Then
                ' skip
            ElseIf word = "religion" Then
                'skip
            Else
                word = Left(word, x - 3)
            End If
        ElseIf Right(word, 2) = "ty" Then
            If Right(word, 3) = "ity" Then
                'skip
            ElseIf word = "sanctity" Then
                'skip
            Else
                word = Left(word, x - 2)
            End If
        Else
            ' do nothing
        End If
    End If
    
    GetNormalizedWord = word
End Function


Sub TestNormalize()
    Dim word As Variant, ThisWord As String, x As String, y As String, testWords() As String
    Debug.Print ""
    
    Dim wordsForTest6 As New Collection ' Collection to store words for test 6
        
    
    If (True) Then
        Debug.Print "--"
        
        x = "Crucifixions crucifixion depictions"
        y = getVisibleCharacters(x)
        testWords = Split(y, " ")
        
        For a = 1 To 5
            Debug.Print "---"
            For i = 0 To UBound(testWords)
                ThisWord = testWords(i)
                If Len(ThisWord) > 3 Then
                    If Right(ThisWord, 2) = "ed" Then
                        If a = 1 Then
                            w = GetNormalizedWord(ThisWord)
                            Debug.Print "1   " & ThisWord & " =    " & w
                        End If
                    ElseIf Right(ThisWord, 3) = "ing" Then
                        If a = 2 Then
                            w = GetNormalizedWord(ThisWord)
                            Debug.Print "2   " & ThisWord & " =    " & w
                        End If
                    ElseIf Right(ThisWord, 2) = "es" Then
                        If a = 3 Then
                            w = GetNormalizedWord(ThisWord)
                            Debug.Print "3   " & ThisWord & " =    " & w
                        End If
                    ElseIf Right(ThisWord, 3) = "ion" Then
                        If a = 4 Then
                            w = GetNormalizedWord(ThisWord)
                            Debug.Print "4   " & ThisWord & " =    " & w
                        End If
                    ElseIf Right(ThisWord, 4) = "ions" Then
                        If a = 5 Then
                            w = GetNormalizedWord(Left(ThisWord, Len(ThisWord) - 1))
                            Debug.Print "5   " & ThisWord & " =    " & w
                        End If
                    ElseIf ThisWord <> "" Then ' Add more conditions for other tests
                        wordsForTest6.Add ThisWord ' Add the word to the collection for test 6
                    End If
                End If
            Next i
        Next a
        
        Debug.Print "--"
    
        ' Process the words collected for test 6
        Debug.Print "---6"
        For Each word In wordsForTest6
            w = GetNormalizedWord(word)
            Debug.Print word & " =    " & w
        Next word
    End If
    
End Sub

Note, this normalizer will ignore 3, 2 or 1 letter words, as well as use some hard-coded special cases

square