• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 2707
  • Last Modified:

VBScript to remove Excel Duplicates

Hi,

Asking for assistance with this script to add duplicate removal functionality.  The goal would be to remove the entire row if any duplicates occur within the "Account" field.  

The most important criteria though is that the "Levy" field continue to drop anything below the 2500 threshold, which it is doing, so any duplicate removing logic should not interfere with that.

Had to great experts assist previously who did wonderful getting it to this point.

Thanks

J
' Define needed constants
Const ForReading = 1
Const ForWriting = 2
Const TriStateUseDefault = -2
Const xlSaveChanges = 1 'Excel Save Changes
Const xlAscending = 1 'Excel Sort Ascending
Const xlDescending = 2 'Excel Sort Decending
Const xlYes = 1 'Excel Header Row Exists

cTab = Chr(9)

' Get input and output file names from command line parms
If (WScript.Arguments.Count > 0) Then
    sInfile = WScript.Arguments(0)
Else
    WScript.Echo "No input filename specified."
    WScript.Quit
End If
If (WScript.Arguments.Count > 1) Then
    sOutfile = WScript.Arguments(1)
Else
    WScript.Echo "No output filename specified."
    WScript.Quit
End If

'Create Excel Object
Set objExcel = CreateObject("Excel.Application")
objExcel.DisplayAlerts = False 'Disable Overwrite Prompts
objExcel.Visible = True 'Hides Excel window

'Create Excel worksheet and add header row
objExcel.Workbooks.Add()
objExcel.Cells(1, 1) = "Account"
objExcel.Cells(1, 2) = "Year"
objExcel.Cells(1, 3) = "LevyBalance"
objExcel.Cells(1, 4) = "Owner"
objExcel.Cells(1, 5) = "Address2"
objExcel.Cells(1, 6) = "Address3"
objExcel.Cells(1, 7) = "Address4"
objExcel.Cells(1, 8) = "City"
objExcel.Cells(1, 9) = "State"
objExcel.Cells(1, 10) = "ZipCode"
intRow = 2 'Set first row to add data

' Create file system object
Set oFSO = CreateObject("Scripting.FileSystemObject")

' Read entire input file into a variable and close it
Set oInfile = oFSO.OpenTextFile(sInfile, ForReading, False, TriStateUseDefault)

Do While Not oInfile.AtEndOfStream
    sLine = oInfile.ReadLine
    Account = Trim(Mid(sLine, 1, 22))
    TYear = Trim(Mid(sLine, 23, 4))
    Over65 = Trim(Mid(sLine, 65, 1))
    Veteran = Trim(Mid(sLine, 66, 1))
    Disabled = Trim(Mid(sLine, 67, 1))
    DatePaid = Trim(Mid(sLine, 69, 8))
    LevyBalance = Trim(Mid(sLine, 87, 11))
    BankRupt = Trim(Mid(sLine, 139, 1))
    Owner = Trim(Mid(sLine, 202, 40))
    Address2 = Trim(Mid(sLine, 242, 40))
    Address3 = Trim(Mid(sLine, 282, 40))
    Address4 = Trim(Mid(sLine, 322, 40))
    City = Trim(Mid(sLine, 362, 24))
    State = Trim(Mid(sLine, 386, 2))
    ZipCode = Mid(sLine, 388, 5) & "-" & Mid(sLine, 393, 4)
           
    If (TYear <> "" And CLng(TYear) < 2010) And Over65 = "" And Veteran = "" And Disabled = "" And DatePaid = "" And ( LevyBalance <> "" And CLng(LevyBalance) >= 2500 ) And BankRupt = "" Then
        objExcel.Cells(intRow, 1) = Account
        objExcel.Cells(intRow, 2) = TYear
        objExcel.Cells(intRow, 3) = LevyBalance
        objExcel.Cells(intRow, 4) = Owner
        objExcel.Cells(intRow, 5) = Address2
        objExcel.Cells(intRow, 6) = Address3
        objExcel.Cells(intRow, 7) = Address4
        objExcel.Cells(intRow, 8) = City
        objExcel.Cells(intRow, 9) = State
        objExcel.Cells(intRow, 10) = ZipCode
        intRow = intRow + 1
    End If
Loop

'Format and Sort Excel workbook

Set objSheet = objExcel.ActiveWorkbook.ActiveSheet
objSheet.Columns("A:A").NumberFormat = "0"
Set objRange = objSheet.UsedRange
objRange.EntireColumn.AutoFit()
For iW = objRange.Column To objRange.Columns.Count
    colWidth = objSheet.Columns(iW).ColumnWidth
    objSheet.Columns(iW).ColumnWidth = colWidth + 5
Next
        
'Save Excel Workbook
objExcel.ActiveWorkbook.SaveAs(sOutfile)
'objExcel.ActiveWorkbook.Close xlSaveChanges
'objExcel.Quit
WScript.Echo "Script Finished"

' Cleanup and end
oInfile.Close
Set oInfile = Nothing
Set oFSO = Nothing

Open in new window

0
jon1966
Asked:
jon1966
  • 5
  • 4
1 Solution
 
sungenwangCommented:
can you attach a sample file?
sew
0
 
jon1966Author Commented:
Sure, here you go, thank you.
sample.txt
0
 
sungenwangCommented:
OK. I have added a dictionary object to your code that will dicard any duplicate "Account" field.
sew

' Define needed constants
Const ForReading = 1
Const ForWriting = 2
Const TriStateUseDefault = -2
Const xlSaveChanges = 1 'Excel Save Changes
Const xlAscending = 1 'Excel Sort Ascending
Const xlDescending = 2 'Excel Sort Decending
Const xlYes = 1 'Excel Header Row Exists

cTab = Chr(9)

' Get input and output file names from command line parms
If (WScript.Arguments.Count > 0) Then
    sInfile = WScript.Arguments(0)
Else
    WScript.Echo "No input filename specified."
    WScript.Quit
End If
If (WScript.Arguments.Count > 1) Then
    sOutfile = WScript.Arguments(1)
Else
    WScript.Echo "No output filename specified."
    WScript.Quit
End If

'Create Excel Object
Set objExcel = CreateObject("Excel.Application")
objExcel.DisplayAlerts = False 'Disable Overwrite Prompts
objExcel.Visible = True 'Hides Excel window

'Create Excel worksheet and add header row
objExcel.Workbooks.Add()
objExcel.Cells(1, 1) = "Account"
objExcel.Cells(1, 2) = "Year"
objExcel.Cells(1, 3) = "LevyBalance"
objExcel.Cells(1, 4) = "Owner"
objExcel.Cells(1, 5) = "Address2"
objExcel.Cells(1, 6) = "Address3"
objExcel.Cells(1, 7) = "Address4"
objExcel.Cells(1, 8) = "City"
objExcel.Cells(1, 9) = "State"
objExcel.Cells(1, 10) = "ZipCode"
intRow = 2 'Set first row to add data

' Create file system object
Set oFSO = CreateObject("Scripting.FileSystemObject")

' Read entire input file into a variable and close it
Set oInfile = oFSO.OpenTextFile(sInfile, ForReading, False, TriStateUseDefault)

Set objDic = CreateObject("Scripting.Dictionary")
    
Do While Not oInfile.AtEndOfStream
    sLine = oInfile.ReadLine

    Account = Trim(Mid(sLine, 1, 22))
    TYear = Trim(Mid(sLine, 23, 4))
    Over65 = Trim(Mid(sLine, 65, 1))
    Veteran = Trim(Mid(sLine, 66, 1))
    Disabled = Trim(Mid(sLine, 67, 1))
    DatePaid = Trim(Mid(sLine, 69, 8))
    LevyBalance = Trim(Mid(sLine, 87, 11))
    BankRupt = Trim(Mid(sLine, 139, 1))
    Owner = Trim(Mid(sLine, 202, 40))
    Address2 = Trim(Mid(sLine, 242, 40))
    Address3 = Trim(Mid(sLine, 282, 40))
    Address4 = Trim(Mid(sLine, 322, 40))
    City = Trim(Mid(sLine, 362, 24))
    State = Trim(Mid(sLine, 386, 2))
    ZipCode = Mid(sLine, 388, 5) & "-" & Mid(sLine, 393, 4)
           
    If Not objDic.Exists(Account) Then
	objDic.Add Account, Account

        If (TYear <> "" And CLng(TYear) < 2010) And Over65 = "" And Veteran = "" And Disabled = "" And DatePaid = "" And ( LevyBalance <> "" And CLng(LevyBalance) >= 2500 ) And BankRupt = "" Then
            objExcel.Cells(intRow, 1) = Account
            objExcel.Cells(intRow, 2) = TYear
            objExcel.Cells(intRow, 3) = LevyBalance
            objExcel.Cells(intRow, 4) = Owner
            objExcel.Cells(intRow, 5) = Address2
            objExcel.Cells(intRow, 6) = Address3
            objExcel.Cells(intRow, 7) = Address4
            objExcel.Cells(intRow, 8) = City
            objExcel.Cells(intRow, 9) = State
            objExcel.Cells(intRow, 10) = ZipCode
            intRow = intRow + 1
        End If
    End If
Loop

'Format and Sort Excel workbook

Set objSheet = objExcel.ActiveWorkbook.ActiveSheet
objSheet.Columns("A:A").NumberFormat = "0"
Set objRange = objSheet.UsedRange
objRange.EntireColumn.AutoFit()
For iW = objRange.Column To objRange.Columns.Count
    colWidth = objSheet.Columns(iW).ColumnWidth
    objSheet.Columns(iW).ColumnWidth = colWidth + 5
Next
        
'Save Excel Workbook
objExcel.ActiveWorkbook.SaveAs(sOutfile)
'objExcel.ActiveWorkbook.Close xlSaveChanges
'objExcel.Quit
WScript.Echo "Script Finished"

' Cleanup and end
oInfile.Close
Set oInfile = Nothing
Set oFSO = Nothing

Open in new window

0
Free Tool: Subnet Calculator

The subnet calculator helps you design networks by taking an IP address and network mask and returning information such as network, broadcast address, and host range.

One of a set of tools we're offering as a way of saying thank you for being a part of the community.

 
sungenwangCommented:
' Define needed constants
Const ForReading = 1
Const ForWriting = 2
Const TriStateUseDefault = -2
Const xlSaveChanges = 1 'Excel Save Changes
Const xlAscending = 1 'Excel Sort Ascending
Const xlDescending = 2 'Excel Sort Decending
Const xlYes = 1 'Excel Header Row Exists

cTab = Chr(9)

' Get input and output file names from command line parms
If (WScript.Arguments.Count > 0) Then
    sInfile = WScript.Arguments(0)
Else
    WScript.Echo "No input filename specified."
    WScript.Quit
End If
If (WScript.Arguments.Count > 1) Then
    sOutfile = WScript.Arguments(1)
Else
    WScript.Echo "No output filename specified."
    WScript.Quit
End If

'Create Excel Object
Set objExcel = CreateObject("Excel.Application")
objExcel.DisplayAlerts = False 'Disable Overwrite Prompts
objExcel.Visible = True 'Hides Excel window

'Create Excel worksheet and add header row
objExcel.Workbooks.Add()
objExcel.Cells(1, 1) = "Account"
objExcel.Cells(1, 2) = "Year"
objExcel.Cells(1, 3) = "LevyBalance"
objExcel.Cells(1, 4) = "Owner"
objExcel.Cells(1, 5) = "Address2"
objExcel.Cells(1, 6) = "Address3"
objExcel.Cells(1, 7) = "Address4"
objExcel.Cells(1, 8) = "City"
objExcel.Cells(1, 9) = "State"
objExcel.Cells(1, 10) = "ZipCode"
intRow = 2 'Set first row to add data

' Create file system object
Set oFSO = CreateObject("Scripting.FileSystemObject")

' Read entire input file into a variable and close it
Set oInfile = oFSO.OpenTextFile(sInfile, ForReading, False, TriStateUseDefault)

Set objDic = CreateObject("Scripting.Dictionary")
   
Do While Not oInfile.AtEndOfStream
    sLine = oInfile.ReadLine

    Account = Trim(Mid(sLine, 1, 22))
    TYear = Trim(Mid(sLine, 23, 4))
    Over65 = Trim(Mid(sLine, 65, 1))
    Veteran = Trim(Mid(sLine, 66, 1))
    Disabled = Trim(Mid(sLine, 67, 1))
    DatePaid = Trim(Mid(sLine, 69, 8))
    LevyBalance = Trim(Mid(sLine, 87, 11))
    BankRupt = Trim(Mid(sLine, 139, 1))
    Owner = Trim(Mid(sLine, 202, 40))
    Address2 = Trim(Mid(sLine, 242, 40))
    Address3 = Trim(Mid(sLine, 282, 40))
    Address4 = Trim(Mid(sLine, 322, 40))
    City = Trim(Mid(sLine, 362, 24))
    State = Trim(Mid(sLine, 386, 2))
    ZipCode = Mid(sLine, 388, 5) & "-" & Mid(sLine, 393, 4)
           
    If Not objDic.Exists(Account) Then
      objDic.Add Account, Account

        If (TYear <> "" And CLng(TYear) < 2010) And Over65 = "" And Veteran = "" And Disabled = "" And DatePaid = "" And ( LevyBalance <> "" And CLng(LevyBalance) >= 2500 ) And BankRupt = "" Then
            objExcel.Cells(intRow, 1) = Account
            objExcel.Cells(intRow, 2) = TYear
            objExcel.Cells(intRow, 3) = LevyBalance
            objExcel.Cells(intRow, 4) = Owner
            objExcel.Cells(intRow, 5) = Address2
            objExcel.Cells(intRow, 6) = Address3
            objExcel.Cells(intRow, 7) = Address4
            objExcel.Cells(intRow, 8) = City
            objExcel.Cells(intRow, 9) = State
            objExcel.Cells(intRow, 10) = ZipCode
            intRow = intRow + 1
        End If
    End If
Loop

'Format and Sort Excel workbook

Set objSheet = objExcel.ActiveWorkbook.ActiveSheet
objSheet.Columns("A:A").NumberFormat = "0"
Set objRange = objSheet.UsedRange
objRange.EntireColumn.AutoFit()
For iW = objRange.Column To objRange.Columns.Count
    colWidth = objSheet.Columns(iW).ColumnWidth
    objSheet.Columns(iW).ColumnWidth = colWidth + 5
Next
       
'Save Excel Workbook
objExcel.ActiveWorkbook.SaveAs(sOutfile)
'objExcel.ActiveWorkbook.Close xlSaveChanges
'objExcel.Quit
WScript.Echo "Script Finished"

' Cleanup and end
oInfile.Close
Set oInfile = Nothing
Set oFSO = Nothing
0
 
jon1966Author Commented:
Hi,

Thank you.  If I take the original script, and process the file, the output is about 15,000 rows.  If I then sort by the "account" column, then use the duplicate removal feature within Excel 2010, to remove duplicates  based on the "account" field only, the result is 5672 rows remaining.

If I process the same file, with your code, the result is 2430 rows remaining.

Is there a private way to get you in touch with the full file?

J
0
 
sungenwangCommented:
you can email it to  (remove the spaces) :

s u n g e n w a n g  

< a t > 

h o t m a i l

{ d o t }

 c o m
0
 
jon1966Author Commented:
Hi, any luck?
0
 
sungenwangCommented:
This should do it... I had to set the dictionary key to a string type - CStr(Account) - due to the huge integer number.

sew

' Define needed constants
Const ForReading = 1
Const ForWriting = 2
Const TriStateUseDefault = -2
Const xlSaveChanges = 1 'Excel Save Changes
Const xlAscending = 1 'Excel Sort Ascending
Const xlDescending = 2 'Excel Sort Decending
Const xlYes = 1 'Excel Header Row Exists

cTab = Chr(9)

' Get input and output file names from command line parms
If (WScript.Arguments.Count > 0) Then
    sInfile = WScript.Arguments(0)
Else
    WScript.Echo "No input filename specified."
    WScript.Quit
End If
If (WScript.Arguments.Count > 1) Then
    sOutfile = WScript.Arguments(1)
Else
    WScript.Echo "No output filename specified."
    WScript.Quit
End If

'Create Excel Object
Set objExcel = CreateObject("Excel.Application")
objExcel.DisplayAlerts = False 'Disable Overwrite Prompts
objExcel.Visible = True 'Hides Excel window

'Create Excel worksheet and add header row
objExcel.Workbooks.Add()
objExcel.Cells(1, 1) = "Account"
objExcel.Cells(1, 2) = "Year"
objExcel.Cells(1, 3) = "LevyBalance"
objExcel.Cells(1, 4) = "Owner"
objExcel.Cells(1, 5) = "Address2"
objExcel.Cells(1, 6) = "Address3"
objExcel.Cells(1, 7) = "Address4"
objExcel.Cells(1, 8) = "City"
objExcel.Cells(1, 9) = "State"
objExcel.Cells(1, 10) = "ZipCode"
intRow = 2 'Set first row to add data

' Create file system object
Set oFSO = CreateObject("Scripting.FileSystemObject")

' Read entire input file into a variable and close it
Set oInfile = oFSO.OpenTextFile(sInfile, ForReading, False, TriStateUseDefault)

Set objDic = CreateObject("Scripting.Dictionary")
    
intcount = 0
Do While Not oInfile.AtEndOfStream
    sLine = oInfile.ReadLine

    Account = Trim(Mid(sLine, 1, 22))
    TYear = Trim(Mid(sLine, 23, 4))
    Over65 = Trim(Mid(sLine, 65, 1))
    Veteran = Trim(Mid(sLine, 66, 1))
    Disabled = Trim(Mid(sLine, 67, 1))
    DatePaid = Trim(Mid(sLine, 69, 8))
    LevyBalance = Trim(Mid(sLine, 87, 11))
    BankRupt = Trim(Mid(sLine, 139, 1))
    Owner = Trim(Mid(sLine, 202, 40))
    Address2 = Trim(Mid(sLine, 242, 40))
    Address3 = Trim(Mid(sLine, 282, 40))
    Address4 = Trim(Mid(sLine, 322, 40))
    City = Trim(Mid(sLine, 362, 24))
    State = Trim(Mid(sLine, 386, 2))
    ZipCode = Mid(sLine, 388, 5) & "-" & Mid(sLine, 393, 4)
           
    If (TYear <> "" And CLng(TYear) < 2010) And Over65 = "" And Veteran = "" And Disabled = "" And DatePaid = "" And ( LevyBalance <> "" And CLng(LevyBalance) >= 2500 ) And BankRupt = "" Then
        If Not objDic.Exists(CStr(Account)) Then

	    objDic.Add CStr(Account), Account

            objExcel.Cells(intRow, 1) = Account
            objExcel.Cells(intRow, 2) = TYear
            objExcel.Cells(intRow, 3) = LevyBalance
            objExcel.Cells(intRow, 4) = Owner
            objExcel.Cells(intRow, 5) = Address2
            objExcel.Cells(intRow, 6) = Address3
            objExcel.Cells(intRow, 7) = Address4
            objExcel.Cells(intRow, 8) = City
            objExcel.Cells(intRow, 9) = State
            objExcel.Cells(intRow, 10) = ZipCode

            intRow = intRow + 1
        End If
    End If
Loop

Set objDic = Nothing

'Format and Sort Excel workbook

Set objSheet = objExcel.ActiveWorkbook.ActiveSheet
objSheet.Columns("A:A").NumberFormat = "0"
Set objRange = objSheet.UsedRange
objRange.EntireColumn.AutoFit()
For iW = objRange.Column To objRange.Columns.Count
    colWidth = objSheet.Columns(iW).ColumnWidth
    objSheet.Columns(iW).ColumnWidth = colWidth + 5
Next
        
'Save Excel Workbook
objExcel.ActiveWorkbook.SaveAs(sOutfile)
'objExcel.ActiveWorkbook.Close xlSaveChanges
'objExcel.Quit
WScript.Echo "Script Finished"

' Cleanup and end
oInfile.Close
Set oInfile = Nothing
Set oFSO = Nothing

Open in new window

0
 
jon1966Author Commented:
Works great, thank you.
0
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

Join & Write a Comment

Featured Post

Get expert help—faster!

Need expert help—fast? Use the Help Bell for personalized assistance getting answers to your important questions.

  • 5
  • 4
Tackle projects and never again get stuck behind a technical roadblock.
Join Now