Create word list with frequency and location of occurrence in Word 2013

Anonymous
2015-12-21T20:38:45+00:00

I need to create an index of every unique word used in a transcription, along with the number of times that word appears and the location it appears (page#:line#). Is this possible? I am aiming for something that looks like the image attached below. Thank you

Microsoft 365 and Office | Word | For home | Windows

Locked Question. This question was migrated from the Microsoft Support Community. You can vote on whether it's helpful, but you can't add comments or replies or follow the question.

0 comments No comments
{count} votes

1 answer

Sort by: Most helpful
  1. Andreas Killer 144K Reputation points Volunteer Moderator
    2015-12-23T13:16:39+00:00

    Option Explicit

    Sub CreateWordListWithFrequencyAndLocationOfOccurrence()

      Dim Dict As Object 'Scripting.Dictionary

      Dim Ignore As Object 'Scripting.Dictionary

      Dim This As Range, Key As String

      Dim Keys, Items

      Dim Info, Item, Temp

      Dim i As Long, j As Long

      Dim C As Collection

      'Create a dictionary with words to be ignored

      Set Ignore = CreateObject("Scripting.Dictionary")

      Ignore.CompareMode = vbTextCompare

      Ignore.Add "", 0

      Ignore.Add "a", 0

      Ignore.Add "an", 0

      Ignore.Add "and", 0

      Ignore.Add "as", 0

      Ignore.Add "be", 0

      Ignore.Add "i", 0

      Ignore.Add "in", 0

      Ignore.Add "of", 0

      Ignore.Add "or", 0

      Ignore.Add "so", 0

      Ignore.Add "the", 0

      Ignore.Add "to", 0

      'Add as many words as you like

      'Create a dictionary for the words of the document

      Set Dict = CreateObject("Scripting.Dictionary")

      Dict.CompareMode = vbTextCompare

      'For each word

      For Each This In ActiveDocument.Words

        'Get the pure word

        Key = Trim$(TrimWhite(This.Text))

        'Ignore?

        If Ignore.Exists(Key) Then GoTo Skip

        'Only words that starts with A..Z

        i = Asc(UCase(Left$(Key, 1)))

        If i < 65 Then GoTo Skip

        If i > 90 Then GoTo Skip

        'Get the "page:line" information

        Info = This.Information(wdActiveEndPageNumber) & _

          ":" & This.Information(wdFirstCharacterLineNumber)

        'Already found?

        If Not Dict.Exists(Key) Then

          'No add a new entry

          Dict.Add Key, Array(0, New Collection)

        End If

        'Read the entry from the dictionary

        Item = Dict.Item(Key)

        'Increment the word counter

        Item(0) = Item(0) + 1

        'Add the information

        Item(1).Add Info

        'Store it back

        Dict.Item(Key) = Item

    Skip:

      Next

      'Create a new document

      Documents.Add ActiveDocument.AttachedTemplate.FullName, False

      Application.ScreenUpdating = False

      'Get all items from the dictionary

      Keys = Dict.Keys

      Items = Dict.Items

      'Sort by words ascending

      QuickSort_DataPrim Keys, Items, msoSortOrderAscending

      'For each word

      For i = 0 To UBound(Items)

        'Get the position

        j = Selection.Start

        'Write 'Word (Occurences)'

        Selection.TypeText Keys(i) & " (" & Items(i)(0) & ")"

        'Make it bold

        ActiveDocument.Range(j, Selection.Start).Font.Bold = True

        Selection.TypeText vbVerticalTab

        'Get all informations and convert into an array

        Set C = Items(i)(1)

        ReDim Temp(1 To C.Count)

        j = 0

        For Each Info In C

          j = j + 1

          Temp(j) = Info

        Next

        'Write the informations

        Selection.TypeText Join(Temp, ", ")

        Selection.TypeText vbVerticalTab

      Next

      'Format to columns

      With ActiveDocument.PageSetup.TextColumns

        .SetCount NumColumns:=5

        .EvenlySpaced = True

        .LineBetween = True

      End With

    End Sub

    Private Function TrimWhite(ByVal S As String) As String

      'Return a string with white space removed

      Dim i As Long

      For i = 1 To Len(S)

        If Asc(Mid$(S, i, 1)) < 32 Then Mid$(S, i, 1) = vbNullChar

      Next

      TrimWhite = Replace$(S, vbNullChar, "")

    End Function

    Private Sub QuickSort_DataPrim(ByRef Arr, ByRef Data, ByVal SortOrder As MsoSortOrder)

      Const QTHRESH As Long = 20

      Dim i As Long, j As Long

      Dim Start As Long, Ende As Long

      Dim Pivot, Temp

      Dim Stack(1 To 64) As Long

      Dim StackPtr As Long

      Start = LBound(Arr)

      Ende = UBound(Arr)

      Stack(StackPtr + 1) = Start

      Stack(StackPtr + 2) = Ende

      StackPtr = StackPtr + 2

      Do

        StackPtr = StackPtr - 2

        Start = Stack(StackPtr + 1)

        Ende = Stack(StackPtr + 2)

        If Ende - Start < QTHRESH Then

          'Insertionsort

          For j = Start + 1 To Ende

            Pivot = Arr(j)

            Temp = Data(j)

            For i = j - 1 To Start Step -1

              If SortOrder = msoSortOrderAscending Then

                If Arr(i) <= Pivot Then Exit For

              Else

                If Arr(i) >= Pivot Then Exit For

              End If

              Arr(i + 1) = Arr(i)

              Data(i + 1) = Data(i)

            Next

            Arr(i + 1) = Pivot

            Data(i + 1) = Temp

          Next

        Else

          'QuickSort

          i = Start: j = Ende

          Pivot = Arr((Start + Ende) \ 2)

          Do

            If SortOrder = msoSortOrderAscending Then

              Do While (Arr(i) < Pivot): i = i + 1: Loop

              Do While (Arr(j) > Pivot): j = j - 1: Loop

            Else

              Do While (Arr(i) > Pivot): i = i + 1: Loop

              Do While (Arr(j) < Pivot): j = j - 1: Loop

            End If

            If i <= j Then

              If i < j Then

                Temp = Arr(i)

                Arr(i) = Arr(j)

                Arr(j) = Temp

                Temp = Data(i)

                Data(i) = Data(j)

                Data(j) = Temp

              End If

              i = i + 1: j = j - 1

            End If

          Loop Until i > j

          If (Start < j) Then

            Stack(StackPtr + 1) = Start

            Stack(StackPtr + 2) = j

            StackPtr = StackPtr + 2

          End If

          If (i < Ende) Then

            Stack(StackPtr + 1) = i

            Stack(StackPtr + 2) = Ende

            StackPtr = StackPtr + 2

          End If

        End If

      Loop Until StackPtr = 0

    End Sub

    6 people found this answer helpful.
    0 comments No comments