Option Explicit
Sub CreateWordListWithFrequencyAndLocationOfOccurrence()
Dim Dict As Object 'Scripting.Dictionary
Dim Ignore As Object 'Scripting.Dictionary
Dim This As Range, Key As String
Dim Keys, Items
Dim Info, Item, Temp
Dim i As Long, j As Long
Dim C As Collection
'Create a dictionary with words to be ignored
Set Ignore = CreateObject("Scripting.Dictionary")
Ignore.CompareMode = vbTextCompare
Ignore.Add "", 0
Ignore.Add "a", 0
Ignore.Add "an", 0
Ignore.Add "and", 0
Ignore.Add "as", 0
Ignore.Add "be", 0
Ignore.Add "i", 0
Ignore.Add "in", 0
Ignore.Add "of", 0
Ignore.Add "or", 0
Ignore.Add "so", 0
Ignore.Add "the", 0
Ignore.Add "to", 0
'Add as many words as you like
'Create a dictionary for the words of the document
Set Dict = CreateObject("Scripting.Dictionary")
Dict.CompareMode = vbTextCompare
'For each word
For Each This In ActiveDocument.Words
'Get the pure word
Key = Trim$(TrimWhite(This.Text))
'Ignore?
If Ignore.Exists(Key) Then GoTo Skip
'Only words that starts with A..Z
i = Asc(UCase(Left$(Key, 1)))
If i < 65 Then GoTo Skip
If i > 90 Then GoTo Skip
'Get the "page:line" information
Info = This.Information(wdActiveEndPageNumber) & _
":" & This.Information(wdFirstCharacterLineNumber)
'Already found?
If Not Dict.Exists(Key) Then
'No add a new entry
Dict.Add Key, Array(0, New Collection)
End If
'Read the entry from the dictionary
Item = Dict.Item(Key)
'Increment the word counter
Item(0) = Item(0) + 1
'Add the information
Item(1).Add Info
'Store it back
Dict.Item(Key) = Item
Skip:
Next
'Create a new document
Documents.Add ActiveDocument.AttachedTemplate.FullName, False
Application.ScreenUpdating = False
'Get all items from the dictionary
Keys = Dict.Keys
Items = Dict.Items
'Sort by words ascending
QuickSort_DataPrim Keys, Items, msoSortOrderAscending
'For each word
For i = 0 To UBound(Items)
'Get the position
j = Selection.Start
'Write 'Word (Occurences)'
Selection.TypeText Keys(i) & " (" & Items(i)(0) & ")"
'Make it bold
ActiveDocument.Range(j, Selection.Start).Font.Bold = True
Selection.TypeText vbVerticalTab
'Get all informations and convert into an array
Set C = Items(i)(1)
ReDim Temp(1 To C.Count)
j = 0
For Each Info In C
j = j + 1
Temp(j) = Info
Next
'Write the informations
Selection.TypeText Join(Temp, ", ")
Selection.TypeText vbVerticalTab
Next
'Format to columns
With ActiveDocument.PageSetup.TextColumns
.SetCount NumColumns:=5
.EvenlySpaced = True
.LineBetween = True
End With
End Sub
Private Function TrimWhite(ByVal S As String) As String
'Return a string with white space removed
Dim i As Long
For i = 1 To Len(S)
If Asc(Mid$(S, i, 1)) < 32 Then Mid$(S, i, 1) = vbNullChar
Next
TrimWhite = Replace$(S, vbNullChar, "")
End Function
Private Sub QuickSort_DataPrim(ByRef Arr, ByRef Data, ByVal SortOrder As MsoSortOrder)
Const QTHRESH As Long = 20
Dim i As Long, j As Long
Dim Start As Long, Ende As Long
Dim Pivot, Temp
Dim Stack(1 To 64) As Long
Dim StackPtr As Long
Start = LBound(Arr)
Ende = UBound(Arr)
Stack(StackPtr + 1) = Start
Stack(StackPtr + 2) = Ende
StackPtr = StackPtr + 2
Do
StackPtr = StackPtr - 2
Start = Stack(StackPtr + 1)
Ende = Stack(StackPtr + 2)
If Ende - Start < QTHRESH Then
'Insertionsort
For j = Start + 1 To Ende
Pivot = Arr(j)
Temp = Data(j)
For i = j - 1 To Start Step -1
If SortOrder = msoSortOrderAscending Then
If Arr(i) <= Pivot Then Exit For
Else
If Arr(i) >= Pivot Then Exit For
End If
Arr(i + 1) = Arr(i)
Data(i + 1) = Data(i)
Next
Arr(i + 1) = Pivot
Data(i + 1) = Temp
Next
Else
'QuickSort
i = Start: j = Ende
Pivot = Arr((Start + Ende) \ 2)
Do
If SortOrder = msoSortOrderAscending Then
Do While (Arr(i) < Pivot): i = i + 1: Loop
Do While (Arr(j) > Pivot): j = j - 1: Loop
Else
Do While (Arr(i) > Pivot): i = i + 1: Loop
Do While (Arr(j) < Pivot): j = j - 1: Loop
End If
If i <= j Then
If i < j Then
Temp = Arr(i)
Arr(i) = Arr(j)
Arr(j) = Temp
Temp = Data(i)
Data(i) = Data(j)
Data(j) = Temp
End If
i = i + 1: j = j - 1
End If
Loop Until i > j
If (Start < j) Then
Stack(StackPtr + 1) = Start
Stack(StackPtr + 2) = j
StackPtr = StackPtr + 2
End If
If (i < Ende) Then
Stack(StackPtr + 1) = i
Stack(StackPtr + 2) = Ende
StackPtr = StackPtr + 2
End If
End If
Loop Until StackPtr = 0
End Sub