Batch processing of .msg files

I work at a law firm and I have a large batch of .msg files that I need to process.  I need to break each file into separate parts, while naming them in a way so that you can tell that the attachments are part of the original email (e.g. convert email_1.msg into--> email1.pdf, email1_attachment1.pdf, email1_attachment2.pdf, etc.).  So far I have a Macro that will print the emails, and attachments (using Outlook 2003), and I am using "NovaPDF" to print them as pdf's.  The problem is that the file name that Outlook gives the email is "outbind_1", and there is no way to tell which message the attachments come from once the batch is printed.  If I could get the name to come from the subject line, or header or something that would be helpful, but you get the point.  Also I am not married to this whole idea either if someone has a better one- in fact, I'd rather leave Outlook out of the equation if I could, but I don't know any other way to break down .msg's like that.
I've attached the vba script I'm using for the macro (It's not mine -Thank you "BlueDevilFan"!),
Private Declare Function ShellExecute Lib "shell32.dll" _
  Alias "ShellExecuteA" (ByVal hwnd As Long, ByVal lpOperation As String, _
  ByVal lpFile As String, ByVal lpParameters As String, _
  ByVal lpDirectory As String, ByVal nShowCmd As Long) As Long
Sub PrintMessagesAndAttachments()
    Dim objFSO As Object, _
        objFolder As Object, _
        objFile As Object, _
        objTempFolder As Object, _
        olkMsg As Object, _
        olkAttachment As Object
    Set objFSO = CreateObject("Scripting.FileSystemObject")
    Set objTempFolder = objFSO.GetSpecialFolder(2)
    'Change the folder path on the following line'
    Set objFolder = objFSO.GetFolder("C:\eeTesting")
    For Each objFile In objFolder.Files
        If objFSO.GetExtensionName(objFile.Path) = "msg" Then
            ShellExecute 0&, "print", objFile.Path, 0&, 0&, 0&
            Set olkMsg = Application.CreateItemFromTemplate(objFile.Path)
            For Each olkAttachment In olkMsg.Attachments
                olkAttachment.SaveAsFile objTempFolder & "\" & olkAttachment.FILENAME
                ShellExecute 0&, "print", objTempFolder & "\" & olkAttachment.FILENAME, 0&, 0&, 0&
        End If
    Set objFSO = Nothing
    Set objFolder = Nothing
    Set objFile = Nothing
    Set objTempFolder = Nothing
    Set olkMsg = Nothing
    Set olkAttachment = Nothing
End Sub

Open in new window

Who is Participating?
Robberbaron (robr)Commented:
im with akimark... we use PDF995, which can be controlled by ini file also. though not free its cheap & generally reliable.

You are currently printing before opening the msg, so your code has no idea of the subject.  I would be naming the output with the datereceived followed by subject.  That way they are easily sorted.

And you really want all attachments as PDF ?? Many wont print very well eg an excel spreadsheet, powerpoint ?

Ive moded your code a bit to show intent.  just psuedo code generally and attached my code to print a HTML file using PDF995. I expect that the control file for BullZip is similar.  The PrintHTML opens the file in a webbroser and uses ExecWB to print. Probably similar to the ShellExec but it allows for a wait loop while print and Pdf processing occurs.
Sub pdfWrite_pdf995(reportname As String, inputtype As String, Optional DocInfo_title As Variant = "zzz report", Optional DocInfo_Subject As Variant = "zzz report", Optional DocInfo_Keywords As Variant = "")

    ' Prints a report file to PDF995 to create a pdf file from the report.
    ' Input parameters are the filename of the existing report
    ' The filetype of the report
    ' and an optional criteria for the report
    ' Be sure to check that the "Generating PDF CS" setting in pdfsync.ini is set to 0
    ' when pdf995 is idle. This codes uses that as a completion flag as it seems to be
    ' the most reliable indication that PDF995 is done writing the pdf file.
    On Error GoTo PdfWError
    '--- save current default printer -----
    Dim olddefprinter As String, lLen As Long
    Dim sNameBuff As String
    ErrLoc = 101
    'remember current default printer
    GetDefaultPrinter vbNullChar, lLen
    sNameBuff = Space$(lLen)
    GetDefaultPrinter sNameBuff, lLen
    olddefprinter = left$(sNameBuff, _
                    InStr(sNameBuff, vbNullChar) - 1)
    ErrLoc = 102
    'change the default printer prior to calling the
    'ExecWB function.
    If olddefprinter <> Trim$("PDF995") Then
        SetDefaultPrinter "PDF995"
    End If
    ErrLoc = 102

    '---configure PDF995 -----------
    Dim syncfile As String, maxwaittime As Long
    Dim iniFileName As String
    Dim outputFile As String, x As Long, destpath As String
    Dim tmpoutputfile As String, tmpAutoLaunch As String
    Dim tmpDocInfo_Title As String, tmpDocInfo_Subject As String, tmpDocInfo_Keywords As String
    Dim tmpCombineDocuments As Integer, tmpCombineLast As Integer, tmpCombineLastPreference As Integer
    ' set the location of the PDF995.ini and the pdfsync files
    iniFileName = PDF995_Path & "\res\pdf995.ini"
    syncfile = PDF995_Path & "\res\pdfsync.ini"
    ' build the output file name from the path parameter and the report name
    destpath = GetFilepathOnly(reportname)
    ErrLoc = 104
    If Mid$(destpath, Len(destpath), 1) <> "\" Then destpath = destpath & "\"
    outputFile = destpath & GetFileNameOnly(reportname) & ".pdf"
    ErrLoc = 105
    ' PDF995 operates asynchronously. We need to determine when it is done so we can
    ' continue. This is done by creating a file and having PDF995 delete it using the
    ' ProcessPDF parameter in its ini file which runs a command when it is complete.
    'MsgBox "pdfw:=" & iniFileName & "=" & vbCrLf & "---" & outputfile & "="
    ' save current settings from the PDF995.ini file
    tmpoutputfile = ReadINIfile("PARAMETERS", "Output File", iniFileName)
    sNameBuff = ReadINIfile("PARAMETERS", "Autolaunch", iniFileName)
    If IsNull(sNameBuff) Then
        tmpAutoLaunch = 1
        tmpAutoLaunch = Val(sNameBuff)
    End If
    tmpDocInfo_Title = ReadINIfile("PARAMETERS", "DocInfo Title", iniFileName)
    ErrLoc = 1051
    tmpDocInfo_Subject = ReadINIfile("PARAMETERS", "DocInfo Subject", iniFileName)
    tmpDocInfo_Keywords = ReadINIfile("PARAMETERS", "DocInfo Keywords", iniFileName)
    sNameBuff = ReadINIfile("PARAMETERS", "Combine Documents", iniFileName)
    If IsNull(sNameBuff) Then
        tmpCombineDocuments = 0
        tmpCombineDocuments = Val(sNameBuff)
    End If
    ErrLoc = 1052
    sNameBuff = ReadINIfile("PARAMETERS", "Combine Last", iniFileName)
    If IsNull(sNameBuff) Then
        tmpCombineLast = 0
        tmpCombineLast = Val(sNameBuff)
    End If
    sNameBuff = ReadINIfile("PARAMETERS", "Combine Last Preference", iniFileName)
    If IsNull(sNameBuff) Then
        tmpCombineLastPreference = 0
        tmpCombineLastPreference = Val(sNameBuff)
    End If

    ErrLoc = 107
    ' remove previous pdf if it exists
    On Error Resume Next
    Kill outputFile
    On Error GoTo Cleanup
    ErrLoc = 108
    ' setup new values in PDF995.ini
    x = WritePrivateProfileString("PARAMETERS", "Output File", outputFile, iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "AutoLaunch", "0", iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "DocInfo Title", CStr(DocInfo_title), iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "DocInfo Subject", CStr(DocInfo_Subject), iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "DocInfo Keywords", CStr(DocInfo_Keywords), iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "Size", "a4       8.3   11.7", iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "Combine Documents", "0", iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "Combine Last", "0", iniFileName)
    ErrLoc = 109
    'print the report to the default printer
    Select Case inputtype
        Case "HTML"
            PrintHtml "pdf995", reportname
        Case "DOC"
        Case Else
            'DoCmd.OpenReport reportname, acViewNormal, , strcriteria
    End Select
        ErrLoc = 110
    ' cleanup delay to allow PDF995 to finish up. When flagfile is nolonger present, PDF995 is done.
    Sleep 10000
    maxwaittime = 30000 'If pdf995 isn't done in 30sec, quit anyway
    Do While ReadINIfile("PARAMETERS", "Generating PDF CS", syncfile) = "1" And maxwaittime > 0
        Sleep 10000
        maxwaittime = maxwaittime - 10000
        ErrLoc = 111
    ' restore the original default printer and the PDF995.ini settings
    Waite 1
    x = WritePrivateProfileString("PARAMETERS", "Output File", tmpoutputfile, iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "AutoLaunch", tmpAutoLaunch, iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "Launch", "", iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "DocInfo Title", tmpDocInfo_Title, iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "DocInfo Subject", tmpDocInfo_Subject, iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "DocInfo Keywords", tmpDocInfo_Keywords, iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "Combine Documents", Str$(tmpCombineDocuments), iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "Combine Last", Str$(tmpCombineLast), iniFileName)
    x = WritePrivateProfileString("PARAMETERS", "Combine Last Preference", Str$(tmpCombineLastPreference), iniFileName)

    ErrLoc = 112
    On Error Resume Next
    'set default printer back to original
    If olddefprinter <> Trim$("PDF995") Then
        SetDefaultPrinter olddefprinter
    End If
        ErrLoc = 113
    Exit Sub
    MsgBox "PDFWrite error" & vbCrLf & "Loc=" & ErrLoc & vbCrLf & Err.Description, vbCritical, "Error Trap"
End Sub

Open in new window

Set objFolder = objFSO.GetFolder("C:\eeTesting")
    For Each objFile In objFolder.Files
        If objFSO.GetExtensionName(objFile.Path) = "msg" Then
            Set olkMsg = Application.CreateItemFromTemplate(objFile.Path)
            msgSubject = olkMsg.Subject
            dteReceived = olkMsg.Received  'or similar
            sOutput = format(dteReceived,"yyyymmdd-HHnnss") & "_" & msgsubject
            'save default printer   <<<<
            'set default printer to PDF995
            'print message to default printer
            'ShellAndWait  <<<May need to do this as PDF creation can take time and print is a synchronous method
            'set output file = sOutput

            For Each olkAttachment In olkMsg.Attachments
               svname = objTempFolder & "\" soutput & "_" & olkAttachment.FILENAME
                olkAttachment.SaveAsFile svName
                ShellExecute 0&, "print", svName, 0&, 0&, 0&
            'reset default printer
        End If

Open in new window

If you print one of these items manually from within Outlook, what is the default title?

Note: I'm not convinced that shellexecute is the best method for initiating the print operation.

With PDF-generation applications, like NovaPDF you can usually set the default values programmatically.  You might want to look at their toolkit.

I use Bullzip, which has an ActiveX object interface and I can create/alter an INI file (in a particular directory) before launching the print operation.
IT_Tech54Author Commented:
HI guys, I am swamped today at work, and my coworker is out for the day (there are only the two of us on the I.T. staff), so I don't have time to look at this code closely right now, but I just wanted to let you know that I really appreciate your help, and I will be getting back to you later today.
Just to clarify a couple things; when I print one of the .msg's manually it sometimes names it "Outbind_" and sometimes "Memo Style"- not sure why it does one or the other, but I'll investigate more later.  Also, it is not mandatory that the docs are pdf.  This project was sort of dumped on me last minute and it is a rush (evidently the lawyers are already in violation for producing the docs late), so this was the first idea I came up with, but I am open to change it.
   The final result needed is this: I need all of the docs (there are tens of thousands total, of different types) broken down into single page images, to be used in a makeshift document management system.  Most of the docs are already pdf's, so I already have a system that I've been using to break down pdf's into single page .png images named after the original doc, this is why my first instinct was to use pdf's, and then just feed them into the current system with the rest of the docs.

  I get what you are saying about spreadsheets (and ppt's, etc., etc.) not printing well, but usually using the default page setup of the original doc works, since most of these docs were meant to be printed anyway- in law, everything is printed at least once and added to a "hard" or "Paper" file.  I don't know of any firms that have gone completely paperless

So far I like the direction you are going, with the pdf writers that have an .ini file that can be modified.  

Again, I really appreciate your help!
Thank you.

Cloud Class® Course: Ruby Fundamentals

This course will introduce you to Ruby, as well as teach you about classes, methods, variables, data structures, loops, enumerable methods, and finishing touches.

You should avoid producing multiple PDFs for each document (for many reasons).  Each PDF should be associated with a document, not a page in a document.

Since these are different document types, I understand the use of ShellExecute.  You want Windows to use the program associated with the different file types.  Your client (employer?) is learning a hard lesson about implementing document management on-the-cheap.

Once you get this working, you can run multiple processes simultaneously, starting each process on a different CORE or different PC.  You should look for ways to partition the documents (by user, by date range, by document type, by filing type, by client, etc.)
Robberbaron (robr)Commented:
if your idea is to make the documents impractical to discover, then your method of saving as individual images makes some sense i guess.

but saving a email as a pdf, with all its attachments in pdf form as following pages is a far easier way to present the data.

but if your issue is time, buy a FPP version of Outlook, burn all the MSG files to DVDs and hand the lot over for discovery as is. (ive actually had to do something like this as we archive project correspondence to PST files, only openable by Outlook naturally enough.)
IT_Tech54Author Commented:
 You are absolutely right about the lesson on doc management, but the fact is that this firm rarely gets a case like this, and they don't want to shell out the $10K+ for a one time thing.  Also when you work for Attorneys (you are right- they are not clients, they are my employer), you can't always do what is best from an I.T. standpoint, sometimes you have to just find a means to a specific end.  The bottom line is that the Attorneys find it easiest, with the current software they are using, to browse through single page images of these docs (using thumbnails) in order to pull out the pages that they need to produce to the opposing counsel. I proposed all types of indexing, databases, ocr, DT search, etc, but it just seems like they love doing things the hard way (lol), but like I said, it doesn't matter what I think, if that is how they want it, then that's how they get it!  Once they pull\arrange the single page docs the way they want them, I combine them back into a few reasonably sized docs, then apply a Bates number and that is how they are produced.  I've got everything covered except for the .msg's, so once I get these processed in a way that makes them happy, it will be back to business as usual.

robberbaron, I'm getting a syntax error on line 16 of the snippet you attached for the Outlook macro, any ideas what's wrong?  I apologize if it's something simple that I'm missing, I'm not too familiar with vb script.
good luck.  I don't think I can help you much further than this.  I think RobberBaron has the best idea (PST export).  Your lawyers can then delete items.

Discovery is a real bitch.
IT_Tech54Author Commented:
Okay, thanks for the effort- I really doappreciate it.
Robberbaron (robr)Commented:
My code is vb6, not vbs. Probably have to remove all the declaration types as vbs is all variants.

Make it VBA compliant and you should be OK.  This is an Outlook automation runtime environment.
IT_Tech54Author Commented:
Okay, got it. I think this is gonna work for me. I'll let you know.
Robberbaron (robr)Commented:
this now compiles in Excel 07 but obviously doesn't work as you need to run in OL.

need to install PDF995 to "C:\PDF995" or change the const to the installed location.
Ive moded the code to be VBA.

Ive added the definitions for API calls and my ancient path split routines that could do with a rewrite.

try it as a base for your app.
I think robberbaron's http:#35188711 comment should be accepted as the solution (400 pts) and aikimark's http:#35169852 comment should be the assisted solution (100 pts).
IT_Tech54Author Commented:
I apologize for taking so long to respond, but Robberbarron's solution did work for me, with some additional code added to it and I'd like to elaborate in order to make this solution complete for the sake of the community.
  I had to add a "ShellAndWait" function that paused the script after processing each message.  Without it the script keeps running as the docs are printing, which ends up adding files to the print queue haphazardly, causing the attachments to end up far removed from the email they came from.

Here is the code I used for the "ShellAndWait" function:
Option Explicit

Private Declare Function OpenProcess Lib "kernel32" _
(ByVal dwDesiredAccess As Long, ByVal bInheritHandle As Long, _
ByVal dwProcessId As Long) As Long

Private Declare Function GetExitCodeProcess Lib "kernel32" _
(ByVal hProcess As Long, lpExitCode As Long) As Long

Private Const STATUS_PENDING = &H103&

Public Function ShellandWait(ExeFullPath As String, _
Optional TimeOutValue As Long = 0) As Boolean
    Dim lInst As Long
    Dim lStart As Long
    Dim lTimeToQuit As Long
    Dim sExeName As String
    Dim lProcessId As Long
    Dim lExitCode As Long
    Dim bPastMidnight As Boolean
    On Error GoTo ErrorHandler

    lStart = CLng(Timer)
    sExeName = ExeFullPath

    If TimeOutValue > 0 Then
        If lStart + TimeOutValue < 86400 Then
            lTimeToQuit = lStart + TimeOutValue
            lTimeToQuit = (lStart - 86400) + TimeOutValue
            bPastMidnight = True
        End If
    End If

    lInst = Shell(sExeName, vbMinimizedNoFocus)
lProcessId = OpenProcess(PROCESS_QUERY_INFORMATION, False, lInst)

        Call GetExitCodeProcess(lProcessId, lExitCode)
        If TimeOutValue And Timer > lTimeToQuit Then
            If bPastMidnight Then
                 If Timer < lStart Then Exit Do
                 Exit Do
            End If
    End If
    Loop While lExitCode = STATUS_PENDING
    ShellandWait = True
    ShellandWait = False
    Exit Function
End Function

Open in new window


Please accept RobberBaron's comment as the solution.
IT_Tech54Author Commented:
Without the added function, even though the script did print and name the docs properly, they ended up being separated from the email, which was not acceptable for what I was doing.
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

All Courses

From novice to tech pro — start learning today.