Link to home
Start Free TrialLog in
Avatar of MikeDTE
MikeDTEFlag for United Kingdom of Great Britain and Northern Ireland

asked on

Processing text files to get data into Excel

I have written the following code to process a text file that has embedded fields of data contained within it.  The code works as long as the file name is 'text.txt'.  In a folder called 'D:\Data\Events I have a number of files, examples as follows:

Anna&Robert_20140722113644.txt
Charles Alexander_20140722141132.txt
David Tucker_20140722141132.txt
Mike_20140722141132.txt
Sue Christley_20140722141133.txt

I need to process each file in return to extract the data in the embedded fields to build up records in the spreadsheet.

The following code would become a called procedure and I need add code to work through each file and one processed copy the file to another 'Processed' folder once the data extraction is done.

Anybody got any ideas?
         
Sub GetEmailData()
  Dim FilePathAndName As String
  Dim TotalFile As String
  Dim FileNum As Long
  Dim Arr() As String
  Dim RegName As String
  Dim EventName As String
  Dim RegEmail As String
  Dim RegTelNo As String
  Dim RegMobile As String
  Dim NumAttending As String
  Dim Y As Long
    
  ' Get path and filename by whatever means you do now
  FilePathAndName = "D:\Data\Text.txt"
  
  ' Read entire file into TotalFile variable
  FileNum = FreeFile
  Open FilePathAndName For Binary As #FileNum
    TotalFile = Space(LOF(FileNum))
    Get #FileNum, , TotalFile
  Close #FileNum
  
  ' Get Event Name
  Arr = Split(TotalFile, vbCrLf & "#Event name:")
  Y = InStr(1, Arr(1), "#Name:") - 6
  EventName = Mid(Arr(1), 2, Y) & ", "

  ' Get Registrant Name
  Arr = Split(TotalFile, vbCrLf & "#Name:")
  Y = InStr(1, Arr(1), "#Email address:") - 6
  RegName = RTrim(Mid(Arr(1), 2, Y)) & ", "
  
  ' Get Registrant Email Address
  Arr = Split(TotalFile, vbCrLf & "#Email address:")
  Y = InStr(1, Arr(1), "#Contact telephone:") - 6
  RegEmail = RTrim(Mid(Arr(1), 2, Y)) & ", "
  
  ' Get Registrant Telephone Number
  Arr = Split(TotalFile, vbCrLf & "#Contact telephone:")
  Y = InStr(1, Arr(1), "#Mobile telephone:") - 6
  RegTelNo = RTrim(Mid(Arr(1), 2, Y)) & ", "
  
  ' Get Registrant Mobile Number
  Arr = Split(TotalFile, vbCrLf & "#Mobile telephone:")
  Y = InStr(1, Arr(1), "#Number in your party attending event:") - 6
  RegMobile = RTrim(Mid(Arr(1), 2, Y)) & ", "
  
  ' Get Number Attending
  Arr = Split(TotalFile, vbCrLf & "#Number in your party attending event:")
  Y = InStr(2, Arr(1), "#End Form") - 6
  NumAttending = RTrim(Mid(Arr(1), 2, Y))
  
  ' Let's see what we got
  MsgBox EventName & RegName & RegEmail & RegTelNo & RegMobile & NumAttending
         
End Sub

Open in new window

Avatar of aikimark
aikimark
Flag of United States of America image

This will iterate the txt files in the directory.
Sub GetEmailData()
  Dim FilePathAndName As String
  Dim TotalFile As String
  Dim FileNum As Long
  Dim Arr() As String
  Dim RegName As String
  Dim EventName As String
  Dim RegEmail As String
  Dim RegTelNo As String
  Dim RegMobile As String
  Dim NumAttending As String
  Dim Y As Long
   Const cPath as String = "D:\Data\"
  ' Get path and filename by whatever means you do now
  FilePathAndName = dir(cPath & "*.txt")
  Do Until Len(FilePathAndName) = 0
	  ' Read entire file into TotalFile variable
	  FileNum = FreeFile
	  Open FilePathAndName For Binary As #FileNum
		TotalFile = Space(LOF(FileNum))
		Get #FileNum, , TotalFile
	  Close #FileNum
	  
	  ' Get Event Name
	  Arr = Split(TotalFile, vbCrLf & "#Event name:")
	  Y = InStr(1, Arr(1), "#Name:") - 6
	  EventName = Mid(Arr(1), 2, Y) & ", "

	  ' Get Registrant Name
	  Arr = Split(TotalFile, vbCrLf & "#Name:")
	  Y = InStr(1, Arr(1), "#Email address:") - 6
	  RegName = RTrim(Mid(Arr(1), 2, Y)) & ", "
	  
	  ' Get Registrant Email Address
	  Arr = Split(TotalFile, vbCrLf & "#Email address:")
	  Y = InStr(1, Arr(1), "#Contact telephone:") - 6
	  RegEmail = RTrim(Mid(Arr(1), 2, Y)) & ", "
	  
	  ' Get Registrant Telephone Number
	  Arr = Split(TotalFile, vbCrLf & "#Contact telephone:")
	  Y = InStr(1, Arr(1), "#Mobile telephone:") - 6
	  RegTelNo = RTrim(Mid(Arr(1), 2, Y)) & ", "
	  
	  ' Get Registrant Mobile Number
	  Arr = Split(TotalFile, vbCrLf & "#Mobile telephone:")
	  Y = InStr(1, Arr(1), "#Number in your party attending event:") - 6
	  RegMobile = RTrim(Mid(Arr(1), 2, Y)) & ", "
	  
	  ' Get Number Attending
	  Arr = Split(TotalFile, vbCrLf & "#Number in your party attending event:")
	  Y = InStr(2, Arr(1), "#End Form") - 6
	  NumAttending = RTrim(Mid(Arr(1), 2, Y))
	  
	  ' Let's see what we got
	  MsgBox EventName & RegName & RegEmail & RegTelNo & RegMobile & NumAttending
	  FilePathAndName = Dir
  Loop
End Sub

Open in new window

It might be helpful if you posted a sample file, especially if your parsing is part of the problem
Avatar of MikeDTE

ASKER

Hi Aikimark

There is no problem with processing individual files.  I just need to wrap the code in a loop and pick-up each separate file in turn.

If it helps however - here is a sample file
Text.txt
My code should iterate the text files.  Please test it.
This might be a bit simpler code, using the regular expression object to do the parsing.
Sub GetEmailData()
	Dim FilePathAndName As String
	Dim TotalFile As String
	Dim FileNum As Long
'	Dim Arr() As String
	Dim RegName As String
	Dim EventName As String
	Dim RegEmail As String
	Dim RegTelNo As String
	Dim RegMobile As String
	Dim NumAttending As String
'	Dim Y As Long
	Const cPath as String = "D:\Data\"
	Dim oRE As Object, oMatches As Object, strData As String

	Set oRE = CreateObject("vbscript.regexp")
	oRE.Global = True
	oRE.Pattern = "#Event name: (.*?)\s+#Name: (.*?)\s+#Email address: (.*?)\s+#Contact telephone: (.*?)\s+#Mobile telephone: (.*?)\s+#Number in your party attending event: (.*?)\s+#End Form"

	' Get path and filename by whatever means you do now
	FilePathAndName = dir(cPath & "*.txt")
	Do Until Len(FilePathAndName) = 0
		' Read entire file into TotalFile variable
		FileNum = FreeFile
		Open FilePathAndName For Input As #FileNum
		TotalFile = Input(LOF(FileNum), #FileNum)
		Close #FileNum

		If oRE.test(strData) Then
			Set oMatches = oRE.Execute(strData)
			With oMatches(0)
				EventName = .submatches(0)
				RegName = .submatches(1)
				RegEmail = .submatches(2)
				RegTelNo = .submatches(3)
				RegMobile = .submatches(4)
				NumAttending = .submatches(5)
			End With
			
			'Debug.Print EventName, RegEmail, RegTelNo, RegMobile, NumAttending
			MsgBox "EventName: " & EventName & vbCr & "RegEmail: " & RegEmail & vbCr & "RegTelNo: " & RegTelNo & vbCr & "RegMobile: " & RegMobile & vbCr & "NumAttending: " & NumAttending
		End If
		
		FilePathAndName = Dir
	Loop
End Sub

Open in new window

For more information on regular expressions, read this article:
http:A_1336-Using-Regular-Expressions-in-Visual-Basic-for-Applications-and-Visual-Basic-6.html
Avatar of MikeDTE

ASKER

Hi Aikimark

Tried your latest code but it falls over on

Open FilePathAndName For Input As #FileNum

with Run-time error 53: File not found

Regards
Forgot the path:
Open cPath & FilePathAndName For Input As #FileNum

Open in new window

Avatar of MikeDTE

ASKER

Hi Aikimark

OK your first code posting now works with cPath added but your second code posting runs without error but produces nothing.  That is because strData is always null.  From Dim statement strData has no value stored to it so oRe.Test condition is not met.

Regards
Mike
Avatar of MikeDTE

ASKER

Hi Aikimark

The final goal is to populate Excel with the data - can you include some code to do that please?

Also the text files containing registration data will arrive right up until the event so we will not just run the code once.  Could the text file name be changed  - e.g.

Anna&Robert_20140722113644.txt
to
Anna&Robert_20140722113644

So that we can preserve the source data and not process files twice.

Alternatively we could move the files to subdirectory once data has been extracted.

Regards
Mike
The problem was introduced when migrating my test code into your code.  I used strData in my test code and didn't replace that variable name with TotalFile.  I also added Option Explicit in order to force the compiler to catch such things.  You should always run with Option Explicit in effect.
Option Explicit

Sub GetEmailData()
	Dim FilePathAndName As String
	Dim TotalFile As String
	Dim FileNum As Long
'	Dim Arr() As String
	Dim RegName As String
	Dim EventName As String
	Dim RegEmail As String
	Dim RegTelNo As String
	Dim RegMobile As String
	Dim NumAttending As String
'	Dim Y As Long
	Const cPath as String = "D:\Data\"
	Dim oRE As Object, oMatches As Object, strData As String

	Set oRE = CreateObject("vbscript.regexp")
	oRE.Global = True
	oRE.Pattern = "#Event name: (.*?)\s+#Name: (.*?)\s+#Email address: (.*?)\s+#Contact telephone: (.*?)\s+#Mobile telephone: (.*?)\s+#Number in your party attending event: (.*?)\s+#End Form"

	' Get path and filename by whatever means you do now
	FilePathAndName = dir(cPath & "*.txt")
	Do Until Len(FilePathAndName) = 0
		' Read entire file into TotalFile variable
		FileNum = FreeFile
		Open cPath & FilePathAndName For Input As #FileNum
		TotalFile = Input(LOF(FileNum), #FileNum)
		Close #FileNum

		If oRE.test(TotalFile) Then
			Set oMatches = oRE.Execute(TotalFile)
			With oMatches(0)
				EventName = .submatches(0)
				RegName = .submatches(1)
				RegEmail = .submatches(2)
				RegTelNo = .submatches(3)
				RegMobile = .submatches(4)
				NumAttending = .submatches(5)
			End With
			
			'Debug.Print EventName, RegEmail, RegTelNo, RegMobile, NumAttending
			MsgBox "EventName: " & EventName & vbCr & "RegEmail: " & RegEmail & vbCr & "RegTelNo: " & RegTelNo & vbCr & "RegMobile: " & RegMobile & vbCr & "NumAttending: " & NumAttending
		End If
		
		FilePathAndName = Dir
	Loop
End Sub

Open in new window

In this example, I delete the files I processed.  You could also use the Name statement to rename the files.
Option Explicit

Sub GetEmailData()
	Dim FilePathAndName As String
	Dim TotalFile As String
	Dim FileNum As Long
'	Dim Arr() As String
	Dim RegName As String
	Dim EventName As String
	Dim RegEmail As String
	Dim RegTelNo As String
	Dim RegMobile As String
	Dim NumAttending As String
'	Dim Y As Long
	Dim colFile As New Collection
	Dim vItem As Variant
	Const cPath as String = "D:\Data\"
	Dim oRE As Object, oMatches As Object, strData As String

	Set oRE = CreateObject("vbscript.regexp")
	oRE.Global = True
	oRE.Pattern = "#Event name: (.*?)\s+#Name: (.*?)\s+#Email address: (.*?)\s+#Contact telephone: (.*?)\s+#Mobile telephone: (.*?)\s+#Number in your party attending event: (.*?)\s+#End Form"

	' Get path and filename by whatever means you do now
	FilePathAndName = dir(cPath & "*.txt")
	Do Until Len(FilePathAndName) = 0
		' Read entire file into TotalFile variable
		FileNum = FreeFile
		Open cPath & FilePathAndName For Input As #FileNum
		TotalFile = Input(LOF(FileNum), #FileNum)
		Close #FileNum

		If oRE.test(TotalFile) Then
			Set oMatches = oRE.Execute(TotalFile)
			With oMatches(0)
				EventName = .submatches(0)
				RegName = .submatches(1)
				RegEmail = .submatches(2)
				RegTelNo = .submatches(3)
				RegMobile = .submatches(4)
				NumAttending = .submatches(5)
			End With
			
			'Debug.Print EventName, RegEmail, RegTelNo, RegMobile, NumAttending
			MsgBox "EventName: " & EventName & vbCr & "RegEmail: " & RegEmail & vbCr & "RegTelNo: " & RegTelNo & vbCr & "RegMobile: " & RegMobile & vbCr & "NumAttending: " & NumAttending
                        colFile.Add cPath & FilePathAndName
		End If
		
		FilePathAndName = Dir
	Loop
    For Each vItem In colFile
        KillFile vItem
    Next
End Sub

Open in new window

Avatar of MikeDTE

ASKER

Hi Aikimark

The killfile vitem statement does not run because vItem and colFile are empty/undefined

I am trying to modify the For ... Next loop to use the Name statement as you have suggested.  I assume vItem will contain a file name so I will take a Mid to remove .txt

so "Anna&Robert_20140722113644.txt"  becomes "Anna&Robert_20140722113644"

then store the modified file name in a string and then use

vItem.Name = strFile

Is that right?

Regards
Mike
if colFile is empty, that means that none of the text files in the folder met the regex pattern.  I based the pattern off the sample file you posted and the original parsing code.

You would change the name of the file string for the new name by removing the last 4 characters (.txt)
Example:
For Each vItem In colFile
    Name vItem As Left(vItem, Len(vItem)-4)
Next

Open in new window

Avatar of MikeDTE

ASKER

Not doubting your expertise but if I set a breakpoint on the For each vItem In colFile and check values on running the procedure I get

vItem = empty
colFile=<Object variable or With block variable not set>

Checking the folder I have 11 *.txt files in the directory 'D:\Data'
1. vItem will not have a value until after the For Each statement
2. set a breakpoint on the colFile.Add cPath & FilePathAndName line.  When you run your code, does this statement get executed?
3. make sure you have an Option Explicit statement in your General Declarations section

If you don't observe the breakpoint, step through your code with F8.

Was your sample txt file representative of the files in your tests?
Avatar of MikeDTE

ASKER

Aha!

I had been adding your latest version changes to a modified version of earlier code you sent me.  The modifications I made had reflected the need to LTRim all of the submatches because of the way some people complete the entry form.  
Also I am using a subfolder of the D:\Data (D:\Data\Events\RiverStroll14).  So I copied in your changes but omitted the colFile.Add cPath & FilePathAndName line.  Sorry.

I did however do the rest incl. Option Explicit

Now added the line and everything on the rename works beautifully.

Only one thing now remains - give me a clue on how I get the extracted data into a spreadsheet please.  I suppose I could store all the extracts in a new txt file in CSV format and then import but is there a more automatic and elegant way of doing it please? Thanks.
Assuming that your code will not invoke msgbox for every file, you would most conveniently update your worksheet either as you extract the data or in a single operation, after moving the data into a 2D array (one column for every field and one row for every item in the collection).

I describe the array-to-worksheet technique in this article:
Fast Data Push to Excel:  http:A_2253.html

If you update the workbook as you parse the text, you should suspend screen updating during the process for best performance.
Avatar of MikeDTE

ASKER

Thanks - will follow your advice and suspend any screen updating.  I will look at the Fast Data Push to Excel in the morning as I'm off now to play Boules.  Thanks for all your help.

Regards
MIke
or Bocce
On a rainy day play Love Letter with three of your friends
Avatar of MikeDTE

ASKER

Heard of Bocce but not Love Letter - assume it's either a water-sport (rain reference) or is it played indoors?
card game for rainy days

I think Bocce is the same game as Boules
Avatar of MikeDTE

ASKER

Hi aikimark

I'm going around in circles here

I have successfully created a 2D array using Arr(9,5) where there are 10 row of 6 columns.

The array rows should correspond to the number of files in the data directory.  SO I count the *.txt files using:

Dim FileSys, DataFolder, DataFiles
Dim Numfiles as Long

Set FileSys = CreateObject("Scripting.FileSystemObject")
Set DataFolder = FileSys.GetFolder(cPath)
Set DataFiles = DataFolder.Files
   
NumFiles = 0
   
For Each DataFiles In DataFolder
     If LCase(FileSys.getextensionname(DataFiles)) = "*.txt" Then
         NumFiles = NumFiles + 1
   
     End If
       
Next

But I get Run-time error 438 - Object doesn't support this property or method

on Line For Each DataFiles In DataFolder

Can you help please?
This will correctly count the number of txt files in your folder
Dim FileSys as object, DataFolder as object, DataFiles as object
Dim Numfiles as Long

Set FileSys = CreateObject("Scripting.FileSystemObject")
Set DataFolder = FileSys.GetFolder(cPath)
'Set DataFiles = DataFolder.Files
    
NumFiles = 0
    
For Each DataFiles In DataFolder.Files
     If LCase(FileSys.getextensionname(DataFiles)) = "txt" Then
         NumFiles = NumFiles + 1
     End If
Next

Open in new window

However, I don't understand why you are doing this.  You are already iterating the txt files with the code you now have that uses the Dir function.  Plus, this loop might not correspond with the files you actually process.
In this case, it might be easier to push individual row data.  
Note: This example still kills the processed files.
Option Explicit

Sub GetEmailData()
	Dim FilePathAndName As String
	Dim TotalFile As String
	Dim FileNum As Long
'	Dim Arr() As String
	Dim RegName As String
	Dim EventName As String
	Dim RegEmail As String
	Dim RegTelNo As String
	Dim RegMobile As String
	Dim NumAttending As String
'	Dim Y As Long
	Dim colFile As New Collection
	Dim vItem As Variant
	Const cPath as String = "D:\Data\"
	Dim oRE As Object, oMatches As Object, strData As String
	Dim rng As Range

	Set oRE = CreateObject("vbscript.regexp")
	oRE.Global = True
	oRE.Pattern = "#Event name: (.*?)\s+#Name: (.*?)\s+#Email address: (.*?)\s+#Contact telephone: (.*?)\s+#Mobile telephone: (.*?)\s+#Number in your party attending event: (.*?)\s+#End Form"

	' Get path and filename by whatever means you do now
	FilePathAndName = dir(cPath & "*.txt")
	Do Until Len(FilePathAndName) = 0
		' Read entire file into TotalFile variable
		FileNum = FreeFile
		Open cPath & FilePathAndName For Input As #FileNum
		TotalFile = Input(LOF(FileNum), #FileNum)
		Close #FileNum

		If oRE.test(TotalFile) Then
			Set oMatches = oRE.Execute(TotalFile)
			With oMatches(0)
				EventName = .submatches(0)
				RegName = .submatches(1)
				RegEmail = .submatches(2)
				RegTelNo = .submatches(3)
				RegMobile = .submatches(4)
				NumAttending = .submatches(5)
			End With
			
			'Debug.Print EventName, RegEmail, RegTelNo, RegMobile, NumAttending
			'MsgBox "EventName: " & EventName & vbCr & "RegEmail: " & RegEmail & vbCr & "RegTelNo: " & RegTelNo & vbCr & "RegMobile: " & RegMobile & vbCr & "NumAttending: " & NumAttending
			colFile.Add array(EventName, RegEmail, RegTelNo, RegMobile, NumAttending, cPath & FilePathAndName)
		End If
		
		FilePathAndName = Dir
	Loop
'push to Excel
        Set rng = ActiveSheet.Range(ActiveSheet.Cells(1, 1), ActiveSheet.Cells(1, 6))
	Application.ScreenUpdating = False
	For Each vitem In colFiles
		rng.Value = vitem	'file name is not pushed because rng is too narrow
		KillFile vItem(6)
		Set rng = rng.Offset(1)
	Next
	Application.ScreenUpdating = True
End Sub

Open in new window

Avatar of MikeDTE

ASKER

I need to know how may files there are to process to dimension the 2D array.  The count seems reliable - I don't understand which it wouldn't be though.

OK - the Runtime error 438 has gone but I now get a Compile error: Constant expression required on the following line of code

 ' Dimension array
 Dim Arr(NumFiles - 1, 6)
ASKER CERTIFIED SOLUTION
Avatar of aikimark
aikimark
Flag of United States of America image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Avatar of MikeDTE

ASKER

This basically works but I have added trimming, some apostrophes to make numeric data a string and attempted to resolve  

Option Explicit

Sub GetEmailData()
    Dim FilePathAndName As String
    Dim TotalFile As String
    Dim FileNum As Long
    Dim RegName As String
    Dim EventName As String
    Dim RegEmail As String
    Dim RegTelNo As String
    Dim RegMobile As String
    Dim NumAttending As String
    Dim colFile As New Collection
    Dim vItem As Variant
    Const cPath As String = "D:\Data\Events\RiverStroll\"
    Dim oRE As Object, oMatches As Object, strData As String
    Dim rng As Range
    Dim lngRow As Long, lngCol As Long

    Set oRE = CreateObject("vbscript.regexp")
    oRE.Global = True
    oRE.Pattern = "#Event name: (.*?)\s+#Name: (.*?)\s+#Email address: (.*?)\s+#Contact telephone: (.*?)\s+#Mobile telephone: (.*?)\s+#Number in your party attending event: (.*?)\s+#End Form"

    ' Get path and filename by whatever means you do now
    FilePathAndName = Dir(cPath & "*.txt")
    
    Do Until Len(FilePathAndName) = 0
        ' Read entire file into TotalFile variable
        FileNum = FreeFile
        Open cPath & FilePathAndName For Input As #FileNum
        TotalFile = Input(LOF(FileNum), #FileNum)
        Close #FileNum

        If oRE.test(TotalFile) Then
            Set oMatches = oRE.Execute(TotalFile)
            
            With oMatches(0)
                EventName = LTrim(RTrim(.submatches(0)))
                RegName = LTrim(RTrim(.submatches(1)))
                RegEmail = LTrim(RTrim(.submatches(2)))
                RegTelNo = "'" & LTrim(RTrim(.submatches(3)))
                RegMobile = "'" & LTrim(RTrim(.submatches(4)))
                NumAttending = LTrim(RTrim(.submatches(5)))
            
            End With
            
            'MsgBox "EventName: " & EventName & vbCr & "RegEmail: " & RegEmail & vbCr & "RegTelNo: " & RegTelNo & vbCr & "RegMobile: " & RegMobile & vbCr & "NumAttending: " & NumAttending
            colFile.Add Array(EventName, RegEmail, RegTelNo, RegMobile, NumAttending, cPath & FilePathAndName)
        
        End If
        
        FilePathAndName = Dir
    Loop
    
    ' Push into Excel
    ReDim vData(1 To colFile.Count, 0 To 6)
    lngRow = 1
    For Each vItem In colFile
        For lngCol = 0 To 5
            vData(lngRow, lngCol) = vItem(lngCol)
        Next
        
        'Kill vItem(6) - no do not delete
        
        ' Rename file
        'Name vItem As Left(vItem, Len(vItem) - 4)
        
        lngRow = lngRow + 1
    
    Next
    
    Set rng = ActiveSheet.Range(ActiveSheet.Cells(1, 1), ActiveSheet.Cells(colFile.Count, 6))
    rng.Value = vData

End Sub

Open in new window


The only bit that now doesn't work is the rename file in the last For .. Next loop
At this point, the iterator variable is (now) an array, with the last item of each array being the file path and name (path\name)
       ' Rename file
       Name vItem(6) As Left(vItem(6), Len(vItem(6)) - 4)

Open in new window

Avatar of MikeDTE

ASKER

Fantastic help with this - I needed a quick solution in doing something a bit out of my usual scope.