Gvigorus
asked on
Please, help convert this VB 6 code to VB.NET
Please, please! Help me convert this code from VB 6 to VB.NET! The below code does not run under VB.NET because there are some differences in the way VB.NET works. Please, let me know what I need to change to get this code to work in .NET. Thank you for your help!
Sub Main()
If Command$() = "" Then
MsgBox "Please drop a PDF file onto this application.", vbOKOnly, "PDF Extract"
Else
If Dir(Command$()) = "" Then
MsgBox "The specified file does not exist.", vbOKOnly, "File Not Found"
ElseIf UCase(Right(Command$(), 4)) <> ".PDF" Then
MsgBox "The specified file is not an Adobe Acrobat file.", vbOKOnly, "Invalid File Type"
Else
ExtractPDF (Command$())
End If
End If
End Sub
Private Sub ExtractPDF(strPath As String)
'Declare our acrobat objects.
Dim gApp As Acrobat.CAcroApp
Dim gPDDoc As Acrobat.CAcroPDDoc
Dim pg As Acrobat.CAcroPDPage
Dim rect As Object
Dim txt As Acrobat.CAcroPDTextSelect
Dim RectAry As Acrobat.CAcroRect
'Declare our counters.
Dim lngCount As Long 'The text object counter.
Dim lngPage As Long 'The page counter.
'Declare our text objects.
Dim strLine As String
Dim strText As String
Dim ary
'Declare our file variables.
Dim strDirectory As String
Dim strFileName As String
Dim strOutputPath As String
'Create the Acrobat objects.
Set gApp = CreateObject("AcroExch.App ")
Set gPDDoc = CreateObject("AcroExch.PDD oc")
Set RectAry = CreateObject("AcroExch.Rec t")
'Setup our directory.
strDirectory = GetDirectoryName(strPath)
'Get the name of the file.
strFileName = GetFileName(strPath)
'Create the output file path.
strOutputPath = strDirectory & Left(strFileName, Len(strFileName) - 3) & ".txt"
'Open our output file.
Open strOutputPath For Output As #1
Set gPDDoc = CreateObject("AcroExch.PDD oc")
'Attempt to open the document.
If gPDDoc.Open(strDirectory & strFileName) Then
'Loop through all of the pages.
For lngPage = 0 To gPDDoc.GetNumPages - 1
'Get the current page.
Set pg = gPDDoc.AcquirePage(lngPage )
'Get the size of the page.
Set rect = pg.GetSize
'Setup our selection rectangle. I cheated here, and just grabbed a really large rectangle.
RectAry.Left = 0
RectAry.Right = rect.x * 10
RectAry.bottom = 0
RectAry.Top = rect.y * 10
'Create the text selection
Set txt = gPDDoc.CreateTextSelect(ln gPage, RectAry)
strLine = ""
'Loop through all of the text objects on the page.
For lngCount = 0 To txt.GetNumText - 1
DoEvents
'Get the text.
strText = Trim(txt.GetText(lngCount) )
'Append this text to the line.
strLine = strLine & vbTab & strText
If InStr(1, strText, vbNewLine) Then
Print #1, strLine
strLine = ""
End If
Next
Next lngPage
End If
'Close this file.
gPDDoc.Close
'Close the output file.
Close #1
'Quit Acrobat.
gApp.Exit
'Let the user know we're done.
MsgBox "The text has been extracted from " & strFileName & ".", vbOKOnly, "Extract Complete"
End Sub
Function GetDirectoryName(ByVal strPath As String)
Dim ary
Dim lngCount As Long
ary = Split(strPath, "\", -1, vbTextCompare)
strPath = ""
For lngCount = 0 To UBound(ary) - 1
strPath = strPath & ary(lngCount) & "\"
Next
GetDirectoryName = strPath
End Function
Function GetFileName(strPath As String)
GetFileName = Right(strPath, Len(strPath) - Len(GetDirectoryName(strPa th)))
End Function
Sub Main()
If Command$() = "" Then
MsgBox "Please drop a PDF file onto this application.", vbOKOnly, "PDF Extract"
Else
If Dir(Command$()) = "" Then
MsgBox "The specified file does not exist.", vbOKOnly, "File Not Found"
ElseIf UCase(Right(Command$(), 4)) <> ".PDF" Then
MsgBox "The specified file is not an Adobe Acrobat file.", vbOKOnly, "Invalid File Type"
Else
ExtractPDF (Command$())
End If
End If
End Sub
Private Sub ExtractPDF(strPath As String)
'Declare our acrobat objects.
Dim gApp As Acrobat.CAcroApp
Dim gPDDoc As Acrobat.CAcroPDDoc
Dim pg As Acrobat.CAcroPDPage
Dim rect As Object
Dim txt As Acrobat.CAcroPDTextSelect
Dim RectAry As Acrobat.CAcroRect
'Declare our counters.
Dim lngCount As Long 'The text object counter.
Dim lngPage As Long 'The page counter.
'Declare our text objects.
Dim strLine As String
Dim strText As String
Dim ary
'Declare our file variables.
Dim strDirectory As String
Dim strFileName As String
Dim strOutputPath As String
'Create the Acrobat objects.
Set gApp = CreateObject("AcroExch.App
Set gPDDoc = CreateObject("AcroExch.PDD
Set RectAry = CreateObject("AcroExch.Rec
'Setup our directory.
strDirectory = GetDirectoryName(strPath)
'Get the name of the file.
strFileName = GetFileName(strPath)
'Create the output file path.
strOutputPath = strDirectory & Left(strFileName, Len(strFileName) - 3) & ".txt"
'Open our output file.
Open strOutputPath For Output As #1
Set gPDDoc = CreateObject("AcroExch.PDD
'Attempt to open the document.
If gPDDoc.Open(strDirectory & strFileName) Then
'Loop through all of the pages.
For lngPage = 0 To gPDDoc.GetNumPages - 1
'Get the current page.
Set pg = gPDDoc.AcquirePage(lngPage
'Get the size of the page.
Set rect = pg.GetSize
'Setup our selection rectangle. I cheated here, and just grabbed a really large rectangle.
RectAry.Left = 0
RectAry.Right = rect.x * 10
RectAry.bottom = 0
RectAry.Top = rect.y * 10
'Create the text selection
Set txt = gPDDoc.CreateTextSelect(ln
strLine = ""
'Loop through all of the text objects on the page.
For lngCount = 0 To txt.GetNumText - 1
DoEvents
'Get the text.
strText = Trim(txt.GetText(lngCount)
'Append this text to the line.
strLine = strLine & vbTab & strText
If InStr(1, strText, vbNewLine) Then
Print #1, strLine
strLine = ""
End If
Next
Next lngPage
End If
'Close this file.
gPDDoc.Close
'Close the output file.
Close #1
'Quit Acrobat.
gApp.Exit
'Let the user know we're done.
MsgBox "The text has been extracted from " & strFileName & ".", vbOKOnly, "Extract Complete"
End Sub
Function GetDirectoryName(ByVal strPath As String)
Dim ary
Dim lngCount As Long
ary = Split(strPath, "\", -1, vbTextCompare)
strPath = ""
For lngCount = 0 To UBound(ary) - 1
strPath = strPath & ary(lngCount) & "\"
Next
GetDirectoryName = strPath
End Function
Function GetFileName(strPath As String)
GetFileName = Right(strPath, Len(strPath) - Len(GetDirectoryName(strPa
End Function
Looks like pretty simple code...did you try using the upgrade wizard?
There is a VB 6 Import wizard you can use to convert vb6 code to .NET. It ain't perfect, but it will get you close. A lot of it depends on the design of your VB6 app. Some import much easier than others.
Hi Gvigorus;
This should work if the name of the PDF file is given on the command line. If this is a Drag and Drop on to a form let me know. Either way you can test the code to see if it does what the old code did.
Option Strict Off
Imports System.IO
Module Module1
Private args() As String
Sub Main()
args = Environment.GetCommandLine Args()
If args.length < 2 Then
MsgBox("Please drop a PDF file onto this application.", MsgBoxStyle.OkOnly, "PDF Extract")
Else
If Not File.Exists(args(1)) Then
MsgBox("The specified file does not exist.", MsgBoxStyle.OkOnly, "File Not Found")
ElseIf Path.GetExtension(args(1)) .ToUpper <> ".PDF" Then
MsgBox("The specified file is not an Adobe Acrobat file.", MsgBoxStyle.OkOnly, "Invalid File Type")
Else
ExtractPDF(args(1))
End If
End If
End Sub
Private Sub ExtractPDF(ByRef strPath As String)
'Declare our acrobat objects.
Dim gApp As Acrobat.CAcroApp
Dim gPDDoc As Acrobat.CAcroPDDoc
Dim pg As Acrobat.CAcroPDPage
Dim rect As Object
Dim txt As Acrobat.CAcroPDTextSelect
Dim RectAry As Acrobat.CAcroRect
'Declare our counters.
Dim lngCount As Integer 'The text object counter.
Dim lngPage As Integer 'The page counter.
'Declare our text objects.
Dim strLine As String
Dim strText As String
Dim ary As Object
'Declare our file variables.
Dim strDirectory As String
Dim strFileName As String
Dim strOutputPath As String
'Create the Acrobat objects.
gApp = CType(CreateObject("AcroEx ch.App"), Global.Acrobat.CAcroApp)
gPDDoc = CType(CreateObject("AcroEx ch.PDDoc") , Global.Acrobat.CAcroPDDoc)
RectAry = CType(CreateObject("AcroEx ch.Rect"), Global.Acrobat.CAcroRect)
'Setup our directory.
strDirectory = Path.GetDirectoryName(args (1))
'Get the name of the file.
strFileName = Path.GetFileName(args(1))
'Create the output file path.
strOutputPath = strDirectory & "\" & Path.GetFileNameWithoutExt ension(arg s(1)) & ".txt"
'Open our output file.
Dim outFile As New StreamWriter(strOutputPath )
gPDDoc = CType(CreateObject("AcroEx ch.PDDoc") , Global.Acrobat.CAcroPDDoc)
'Attempt to open the document.
If gPDDoc.Open(strDirectory & "\" & strFileName) Then
'Loop through all of the pages.
For lngPage = 0 To gPDDoc.GetNumPages - 1
'Get the current page.
pg = CType(gPDDoc.AcquirePage(l ngPage), Global.Acrobat.CAcroPDPage )
'Get the size of the page.
rect = pg.GetSize
'Setup our selection rectangle. I cheated here, and just grabbed a really large rectangle.
RectAry.Left = 0
RectAry.right = rect.x * 10
RectAry.bottom = 0
RectAry.Top = rect.y * 10
'Create the text selection
txt = CType(gPDDoc.CreateTextSel ect(lngPag e, RectAry), Global.Acrobat.CAcroPDText Select)
strLine = ""
'Loop through all of the text objects on the page.
For lngCount = 0 To txt.GetNumText - 1
System.Windows.Forms.Appli cation.DoE vents()
'Get the text.
strText = Trim(txt.GetText(lngCount) )
'Append this text to the line.
strLine &= vbTab & strText
If CBool(InStr(1, strText, vbNewLine)) Then
outFile.WriteLine(strLine)
strLine = ""
End If
Next
Next lngPage
End If
'Close this file.
gPDDoc.Close()
'Close the output file.
outFile.Close()
'Quit Acrobat.
gApp.Exit()
'Let the user know we're done.
MsgBox("The text has been extracted from " & strFileName & ".", MsgBoxStyle.OkOnly, "Extract Complete")
End Sub
End Module
Fernando
This should work if the name of the PDF file is given on the command line. If this is a Drag and Drop on to a form let me know. Either way you can test the code to see if it does what the old code did.
Option Strict Off
Imports System.IO
Module Module1
Private args() As String
Sub Main()
args = Environment.GetCommandLine
If args.length < 2 Then
MsgBox("Please drop a PDF file onto this application.", MsgBoxStyle.OkOnly, "PDF Extract")
Else
If Not File.Exists(args(1)) Then
MsgBox("The specified file does not exist.", MsgBoxStyle.OkOnly, "File Not Found")
ElseIf Path.GetExtension(args(1))
MsgBox("The specified file is not an Adobe Acrobat file.", MsgBoxStyle.OkOnly, "Invalid File Type")
Else
ExtractPDF(args(1))
End If
End If
End Sub
Private Sub ExtractPDF(ByRef strPath As String)
'Declare our acrobat objects.
Dim gApp As Acrobat.CAcroApp
Dim gPDDoc As Acrobat.CAcroPDDoc
Dim pg As Acrobat.CAcroPDPage
Dim rect As Object
Dim txt As Acrobat.CAcroPDTextSelect
Dim RectAry As Acrobat.CAcroRect
'Declare our counters.
Dim lngCount As Integer 'The text object counter.
Dim lngPage As Integer 'The page counter.
'Declare our text objects.
Dim strLine As String
Dim strText As String
Dim ary As Object
'Declare our file variables.
Dim strDirectory As String
Dim strFileName As String
Dim strOutputPath As String
'Create the Acrobat objects.
gApp = CType(CreateObject("AcroEx
gPDDoc = CType(CreateObject("AcroEx
RectAry = CType(CreateObject("AcroEx
'Setup our directory.
strDirectory = Path.GetDirectoryName(args
'Get the name of the file.
strFileName = Path.GetFileName(args(1))
'Create the output file path.
strOutputPath = strDirectory & "\" & Path.GetFileNameWithoutExt
'Open our output file.
Dim outFile As New StreamWriter(strOutputPath
gPDDoc = CType(CreateObject("AcroEx
'Attempt to open the document.
If gPDDoc.Open(strDirectory & "\" & strFileName) Then
'Loop through all of the pages.
For lngPage = 0 To gPDDoc.GetNumPages - 1
'Get the current page.
pg = CType(gPDDoc.AcquirePage(l
'Get the size of the page.
rect = pg.GetSize
'Setup our selection rectangle. I cheated here, and just grabbed a really large rectangle.
RectAry.Left = 0
RectAry.right = rect.x * 10
RectAry.bottom = 0
RectAry.Top = rect.y * 10
'Create the text selection
txt = CType(gPDDoc.CreateTextSel
strLine = ""
'Loop through all of the text objects on the page.
For lngCount = 0 To txt.GetNumText - 1
System.Windows.Forms.Appli
'Get the text.
strText = Trim(txt.GetText(lngCount)
'Append this text to the line.
strLine &= vbTab & strText
If CBool(InStr(1, strText, vbNewLine)) Then
outFile.WriteLine(strLine)
strLine = ""
End If
Next
Next lngPage
End If
'Close this file.
gPDDoc.Close()
'Close the output file.
outFile.Close()
'Quit Acrobat.
gApp.Exit()
'Let the user know we're done.
MsgBox("The text has been extracted from " & strFileName & ".", MsgBoxStyle.OkOnly, "Extract Complete")
End Sub
End Module
Fernando
ASKER
Hi Fernando,
Thanks for such a quick and thorough post! Can you tell me how exactly I would pass the pdf file name at the command line? you mean in CMD shell? Everything compiled, i am trying to test the prog. Thanks again.
Thanks for such a quick and thorough post! Can you tell me how exactly I would pass the pdf file name at the command line? you mean in CMD shell? Everything compiled, i am trying to test the prog. Thanks again.
Hi Gvigorus;
If you are running the program from the Command Prompt shell window then do the following.
program-name pdf-filename-with-full-pat h
If you are running the program from the Visual Studio Development Environment, IDE, then do the following:, from the IDE main menu click on Project -> Project-Name Properties... , Next step depends on which version of VS you have:
Visual Studio 2005
then click on the Debug tab then in the text box next to the label "Command line arguments" place the name of the PDF file with full path. The you can just run the program.
Visual Studio 2003
On the left had panel click on Configuration Properties then under that node click on Debugging then on the right side in the text area with the label "Command line arguments" place the name of the PDF file with full path. The you can just run the program.
Fernando
If you are running the program from the Command Prompt shell window then do the following.
program-name pdf-filename-with-full-pat
If you are running the program from the Visual Studio Development Environment, IDE, then do the following:, from the IDE main menu click on Project -> Project-Name Properties... , Next step depends on which version of VS you have:
Visual Studio 2005
then click on the Debug tab then in the text box next to the label "Command line arguments" place the name of the PDF file with full path. The you can just run the program.
Visual Studio 2003
On the left had panel click on Configuration Properties then under that node click on Debugging then on the right side in the text area with the label "Command line arguments" place the name of the PDF file with full path. The you can just run the program.
Fernando
ASKER
I'm using VB.NET 2005 Express edition.
I think the first time you probably meant for me to compile this project as a console app, not windows app. When I compile as windows app, i just get a blank form on the screen.
I just tried compiling as Console App and got an error in line 96
System.Windows.Forms.Appli cation.DoE vents()
It said that "Windows" was not member of "System". Probably because this is a Console app.
I'll try doing what you said via VS 2005 Debug tab.
Thank you again!!!
I think the first time you probably meant for me to compile this project as a console app, not windows app. When I compile as windows app, i just get a blank form on the screen.
I just tried compiling as Console App and got an error in line 96
System.Windows.Forms.Appli
It said that "Windows" was not member of "System". Probably because this is a Console app.
I'll try doing what you said via VS 2005 Debug tab.
Thank you again!!!
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
Fernando, you got it to work! Great stuff. Thank you so much for your help!!!
-=G=-
-=G=-
Not a problem Gvigorus, One other thing when you add the code to a project you will need to also add a reference to the Acrobat lib. You can do this by going to the Solution Explorer window -> right click on the project name at the top of the window -> click on Add Reference... -> click on the COM tab -> then find the line that reads Acrobat. You may have other Adobe libs with longer names you want the one that just has Acrobat.