Hans,
Yes that will do nicely. Thank you. This is a lot of code, but since you have helped so much I wanted to share it with you. The purpose is to process a batch folder of Word Documents and extract the content (text) of each content control and record it
in an Access database.
I start with the database: D:\Batch\Tally Data Forms\Extracted Data.mdb which has a single column labeled "Record Number."
Since their are nine types of CCs and since "For Each oCC in ActiveDocument.ContentControls" only processes the content controls in the main text story range of the document have setup a function to return applicable data on the types of content controls I
want to process. One criteria is that they must be titled.
There remains one issue that I have not fully addressed. That is the possibility of two or more content controls having the same title. That is an expected case where mapped content conrols are used and in that case each would contain the same text. Otherwise is
is just poor document design IMHO. As it is, value of the last duplicate titled CC will determine the value stored in the database.
This is something that I plan to publish on my website. I would like to give you credit for your assistance. Credit by real name if you don't mind and you are willing to provide. Thanks again.
Option Explicit
Dim oDoc As Word.Document
Dim oCC As ContentControl
Sub ExtractDataFromDocumentCCs()
'Stores data in Access database. Requires reference to MS ActiveX Data Objects 2.8 Library
Dim pPath As String
Dim pFileName As String
Dim arrFiles() As String
Dim i As Long, j As Long
Dim vConnection As New ADODB.Connection
Dim vRecordSet As New ADODB.Recordset
Dim FiletoKill As String
Dim pCmdLind As String
Dim arrCCData() As String
Dim bProcessCCs As Boolean
'Dim Column As Variant
'Dim arrCols() As String
pPath = GetPathToUse
If pPath = "" Then
MsgBox "A folder was not selected"
Exit Sub
End If
CreateProcessedDirectory pPath
'Identify Word Document files in folder to process.
pFileName = Dir$(pPath & "*.doc")
ReDim arrFiles(1 To 1000) 'A number larger than the expected number of Word files in folder to process
'Add file name to the array
Do While pFileName <> ""
i = i + 1
arrFiles(i) = pFileName
'Get the next file name
pFileName = Dir$
Loop
If i = 0 Then
MsgBox "The selected folder did not contain any forms to process."
Exit Sub
End If
'Resize and preserve the array
ReDim Preserve arrFiles(1 To i)
Application.ScreenUpdating = False
'Provide connection string for data using Jet Provider for Access database
vConnection.ConnectionString = "data source=D:\Batch\Tally Data Forms\Extracted Data.mdb;" & _
"Provider=Microsoft.Jet.OLEDB.4.0;"
vConnection.Open
If MsgBox("Do you want to clear stored data presently in the database?", vbQuestion + vbYesNo, "CLEAR DATABASE") = vbYes Then
'Clear database records by dropping and recreating table
vConnection.Execute "DROP TABLE MyTable"
vConnection.Execute "CREATE TABLE MyTable ([Record Number] Integer CONSTRAINT PrimaryKey PRIMARY KEY)"
End If
'Retrieve data from each document in batch folder
For i = 1 To UBound(arrFiles)
Set oDoc = Documents.Open(FileName:=pPath & arrFiles(i), Visible:=False)
'Identify the file to remove from batch folder after processing
FiletoKill = pPath & oDoc
'Collect data from content controls in document to process.
bProcessCCs = True
arrCCData = DocCCData
If arrCCData(0, 2) = "***DOCUMENT DOES NOT CONTAIN ANY VALID CONTENT CONTROLS***" Then bProcessCCs = False
If bProcessCCs Then
'Add/ensure a column exists for each content control title. Attempts to add duplicate column is handled by error handler.
On Error Resume Next
For j = 0 To UBound(arrCCData)
pCmdLind = "ALTER TABLE MyTable ADD COLUMN [" & arrCCData(j, 0) & "] Text;"
vConnection.Execute pCmdLind
Next j
On Error GoTo 0
End If
'Open the recordset.
vRecordSet.Open "MyTable", vConnection, adOpenKeyset, adLockOptimistic
'Add recored for each document processed
vRecordSet.AddNew
With oDoc
vRecordSet("Record Number") = vRecordSet.RecordCount + 1
If bProcessCCs Then
For j = 0 To UBound(arrCCData)
vRecordSet(arrCCData(j, 0)) = arrCCData(j, 1)
Next j
End If
'Save processed file in Processed folder
.SaveAs pPath & "Processed" & .Name
.Close
Kill FiletoKill 'Delete file from the batch folder
End With
vRecordSet.Update
vRecordSet.Close
Next i
vConnection.Close
Set vRecordSet = Nothing
Set vConnection = Nothing
Set oDoc = Nothing
Set oCC = Nothing
Application.ScreenUpdating = True
End Sub
Private Function GetPathToUse() As Variant
Dim fDialog As FileDialog
Set fDialog = Application.FileDialog(msoFileDialogFolderPicker)
With fDialog
.Title = "Select Folder containing the completed form documents to and click OK"
.AllowMultiSelect = False
.InitialView = msoFileDialogViewList
If .Show <> -1 Then
GetPathToUse = ""
Set fDialog = Nothing
Exit Function
End If
GetPathToUse = fDialog.SelectedItems.Item(1)
If Right(GetPathToUse, 1) <> "" Then GetPathToUse = GetPathToUse + ""
End With
End Function
Sub CreateProcessedDirectory(pPath As String)
'Requires reference to Microsoft Scripting Runtime
Dim Path As String
Dim fso As FileSystemObject
Path = pPath
Dim NewDir As String
Set fso = CreateObject("Scripting.FileSystemObject")
NewDir = Path & "Processed"
If Not fso.FolderExists(NewDir) Then
fso.CreateFolder NewDir
End If
End Sub
Function DocCCData() As String()
Dim arrX() As String
Dim lngValidator As Long
Dim rngStory As Word.Range
Dim oCount As Long
Dim oShp As Word.Shape
Dim x As Long
lngValidator = ActiveDocument.Sections(1).Headers(1).Range.StoryType
'Get count of titled CCs of valid type in document
For Each rngStory In ActiveDocument.StoryRanges
'Iterate through all linked stories
Select Case rngStory.StoryType
Case 1 To 11
Do
On Error Resume Next
For Each oCC In rngStory.ContentControls
Select Case oCC.Type
Case 7 'Do not include Group yype controls
Case Else
If StrPtr(oCC.Title) <> 0 Then
oCount = oCount + 1
End If
End Select
Next oCC
Select Case rngStory.StoryType
Case 6, 7, 8, 9, 10, 11
If rngStory.ShapeRange.Count > 0 Then
For Each oShp In rngStory.ShapeRange
If oShp.TextFrame.HasText Then
For Each oCC In oShp.TextFrame.TextRange.ContentControls
Select Case oCC.Type
Case 7 'Do not include Group yype controls
Case Else
If StrPtr(oCC.Title) <> 0 Then
oCount = oCount + 1
End If
End Select
Next oCC
End If
Next oShp
End If
Case Else
'Do Nothing
End Select
On Error GoTo 0
'Get next linked story (if any)
Set rngStory = rngStory.NextStoryRange
Loop Until rngStory Is Nothing
Case Else
End Select
Next
ReDim arrX(0, 2)
If oCount > 0 Then
x = 0
ReDim arrX(oCount - 1, 2)
For Each rngStory In ActiveDocument.StoryRanges
'Iterate through all linked stories
Select Case rngStory.StoryType
Case 1 To 11
Do
For Each oCC In rngStory.ContentControls
Select Case oCC.Type
Case 7 'Do not include Group type controls
Case Else
If StrPtr(oCC.Title) <> 0 Then
arrX(x, 0) = oCC.Title
arrX(x, 2) = oCC.ShowingPlaceholderText
Select Case oCC.Type
Case wdContentControlPicture
On Error Resume Next
arrX(x, 1) = oCC.Range.InlineShapes(1).LinkFormat.SourceFullName
If Err.Number <> 0 Then arrX(x, 1) = "Empty\Unlinked Image"
On Error GoTo 0
Case Else
If Not oCC.ShowingPlaceholderText Then
arrX(x, 1) = oCC.Range.Text
Else
arrX(x, 1) = ""
End If
End Select
x = x + 1
End If
End Select
Next oCC
Select Case rngStory.StoryType
Case 6, 7, 8, 9, 10, 11
If rngStory.ShapeRange.Count > 0 Then
For Each oShp In rngStory.ShapeRange
If oShp.TextFrame.HasText Then
For Each oCC In oShp.TextFrame.TextRange.ContentControls
Select Case oCC.Type
Case 7 'Do not include Group type controls
Case Else
If StrPtr(oCC.Title) <> 0 Then
arrX(x, 0) = oCC.Title
arrX(x, 2) = oCC.ShowingPlaceholderText
Select Case oCC.Type
Case wdContentControlPicture
On Error Resume Next
arrX(x, 1) = oCC.Range.InlineShapes(1).LinkFormat.SourceFullName
If Err.Number <> 0 Then arrX(x, 1) = "Empty\Unlinked Image"
On Error GoTo 0
Case Else
If Not oCC.ShowingPlaceholderText Then
arrX(x, 1) = oCC.Range.Text
Else
arrX(x, 1) = ""
End If
End Select
x = x + 1
End If
End Select
Next oCC
End If
Next oShp
End If
Case Else
'Do Nothing
End Select
On Error GoTo 0
'Get next linked story (if any)
Set rngStory = rngStory.NextStoryRange
Loop Until rngStory Is Nothing
Case Else
End Select
Next rngStory
Else
arrX(0, 2) = "***DOCUMENT DOES NOT CONTAIN ANY VALID CONTENT CONTROLS***"
End If
DocCCData = arrX
Set oCC = Nothing
End Function
If you or anyone else reviewing this thread has a suggestion for a better way to handle the case when the document does not contain any valid content controls then by all means suggest!!
Greg Maxey --- Visit my website at: http://gregmaxey.mvps.org/word\_tips.htm