My end goal here is to convert a PDF to an excel sheet by reading and writing the PDF table information. I know Adobe Pro has a function that will convert the PDF to an excel sheet but the final formatting is not what I'm looking for. Adobe is recognizing the PDF's as scanned images but I need to read the actual text within the tables. The code below will read and import table data from a PDF table that I created in word and saved as a PDF. Does anyone know how to run character text analysis using VBA so the PDF is readable?
Dim aApp As Acrobat.AcroApp
Dim av_doc As CAcroAVDoc
Dim pdf_doc As CAcroPDDoc
Dim sel_text As CAcroPDTextSelect
Dim i As Long, j As Long
Dim PageNumber, PageContent, Content
Dim data_print As Boolean
Dim cnt As Long
Dim curRow As Long
curRow = 1
'Set wksSource = ActiveWorkbook.ActiveSheet
Set aApp = CreateObject("AcroExch.App")
Set av_doc = CreateObject("AcroExch.AVDoc")
If av_doc.Open(pdf_file, vbNull) <> True Then Exit Sub
While av_doc Is Nothing
Set av\_doc = aApp.GetActiveDoc
Wend
Set pdf_doc = av_doc.GetPDDoc
'Set Rng = Worksheets("Sheet1").Range("A1")
For i = 0 To pdf_doc.GetNumPages - 1
Set PageNumber = pdf\_doc.AcquirePage(i)
Set PageContent = CreateObject("AcroExch.HiliteList")
On Error Resume Next
If PageContent.Add(0, 9000) <> True Then Exit Sub
Set sel\_text = PageNumber.CreatePageHilite(PageContent)
On Error GoTo 0
For j = 0 To sel\_text.GetNumText - 1
Content = sel\_text.GetText(j)
If Content Like "\*PART NO\*" Then
data\_print = True
ElseIf Content Like "\*GEAE\*" Then
data\_print = False
Exit For
End If
If data\_print = True Then
cnt = cnt + 1
Cells(curRow, cnt) = Application.WorksheetFunction.Clean(Trim(Content))
End If
If cnt = 7 Then
cnt = 0
curRow = curRow + 1
End If
'Range("A" & Rows.Count).End(xlUp).Offset(1, 0).Value = sel\_text.GetText(j)
Next j
Next i
av_doc.Close False
aApp.Exit
Set sel_text = Nothing
Set PageNumber = Nothing
End Sub