asked on
Sub ExtractPDFs()
'Note: Requires reference to Acrobat object library, via Tools|References
Application.ScreenUpdating = False
Dim strFolder As String, strFile As String, xlBook As Workbook, xlSheet As Worksheet
Dim AcroApp As CAcroApp, AcroAVDoc As CAcroAVDoc, AcroPDDoc As CAcroPDDoc
Dim AcroTextSelect As CAcroPDTextSelect, PageNumber As Object, PageContent As Object
Dim i As Long, j As Long, k As Long, StrContent As String
strFolder = GetFolder
If strFolder = "" Then Exit Sub
Set xlBook = ActiveWorkbook 'Workbooks.Open(Filename:="", AddToMRU:=False)
Set AcroAVDoc = CreateObject("AcroExch.AVDoc")
Set AcroApp = CreateObject("AcroExch.App")
strFile = Dir(strFolder & "\*.pdf", vbNormal)
While strFile <> ""
If AcroAVDoc.Open(strFolder & "\" & strFile, vbNull) = True Then
Set xlSheet = xlBook.Sheets.Add
xlSheet.Name = Split(strFile, ".pdf")(0)
Application.ScreenUpdating = True
Application.ScreenUpdating = False
While AcroAVDoc Is Nothing
Set AcroAVDoc = AcroApp.GetActiveDoc
Wend
StrContent = ""
Set AcroPDDoc = AcroAVDoc.GetPDDoc
For i = 0 To AcroPDDoc.GetNumPages - 1
Set PageNumber = AcroPDDoc.AcquirePage(i)
Set PageContent = CreateObject("AcroExch.HiliteList")
If PageContent.Add(0, 9000) = True Then
Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
' The next line is needed to avoid errors with protected PDFs that can't be read
On Error Resume Next
For j = 0 To AcroTextSelect.GetNumText - 1
StrContent = StrContent & AcroTextSelect.GetText(j)
Next j
With xlSheet
j = .UsedRange.Rows.Count + 1
For k = 0 To UBound(Split(StrContent, vbCr))
.Range("A" & j + k).Value = Split(StrContent, vbCr)(k)
Next
.Range("A1").Value = strFile
.UsedRange.WrapText = False
End With
End If
Next i
AcroAVDoc.Close True
End If
strFile = Dir()
Wend
Set xlSheet = Nothing: Set xlBook = Nothing
Set PageContent = Nothing: Set PageNumber = Nothing
Set AcroTextSelect = Nothing: Set AcroAVDoc = Nothing: Set AcroApp = Nothing
Application.ScreenUpdating = True
End Sub
Function GetFolder() As String
Dim oFolder As Object
GetFolder = ""
Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
Set oFolder = Nothing
End Function