• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 315
  • Last Modified:

Remove rows containing duplicate values in Excel using VB.NET

Hey guys,

I have been looking around for a while now to try and find this answer and I keep coming up short. I am creating a small application that will sort an excel document from the "A" column and then delete the duplicates in the column. The test data is as follows:

      A                B                     C
1 Reference      Page      Document
2 test 1      Page 1      test1.docx
3 test 2      Page 2      test1.docx
4 test 3      Page 1      test1.docx
5 test 3      Page 2      test1.docx

I have the sorting down pat, I just need to remove the duplicates. I hope someone can help me out.

Cheers
G_M

   
Public Sub SortDocumentInExcel()
        Dim strWorkingFile As String
        Dim objExcel As Excel.Application
        Dim objWorkbook As Excel.Workbook
        Dim objWorkSheet As Excel.Worksheet
        Dim intRowValue As Integer = 1

        strWorkingFile = frmGetReferences.lblOutputFolder.Text & "wf_" & frmGetReferences.txtOutputFilename.Text & ".xlsx"
        objExcel = CreateObject("Excel.Application")
        objWorkbook = objExcel.Workbooks.Open(strWorkingFile)
        objWorkSheet = objWorkbook.Sheets("sheet1")
        objExcel.Visible = True

        ' Sort the data in the "A" column; expanding selection to other columns
        objWorkSheet.Cells.Sort(Key1:=objWorkSheet.Range("A2"), _
                                Order1:=Excel.XlSortOrder.xlAscending, Header:=Excel.XlYesNoGuess.xlYes, _
                                Orientation:=Excel.XlSortOrientation.xlSortColumns, SortMethod:=Excel.XlSortMethod.xlPinYin)

        'Attempt at removing duplicate values... Does not work
        While objWorkSheet.Cells.Value("A" & intRowValue) <> ""

            If objWorkSheet.Cells.Value("A" & intRowValue) = objWorkSheet.Cells.Value("A" & intRowValue + 1) Then
                objWorkSheet.Range("A" & intRowValue).entirerow.delete()
            Else
                intRowValue = intRowValue + 1
            End If

        End While



        objWorkbook.Save()
        'objWorkbook.Close()
        'objExcel.Quit()

        objExcel = Nothing
        objWorkbook = Nothing
        objWorkSheet = Nothing

    End Sub

Open in new window

0
G_M
Asked:
G_M
  • 3
  • 2
1 Solution
 
NorieCommented:
Just column A?
0
 
G_MAuthor Commented:
Yes. I only want to remove the row if the duplicate appears in column A
0
 
NorieCommented:
Try this.
Dim objXLRange As Excel.Range

LastRow = objWorksheet.Range("A" & objWorksheet.Rows.Count).End(Excel.XlDirection.xlUp).Row
		
objXLRange = objWorksheet.Range("A1").Resize(LastRow,3)
				
objXLRange.RemoveDuplicates(Columns:=1, Header:=Excel.XlYesNoGuess.xlYes)

Open in new window

0
 
G_MAuthor Commented:
I just had to declare the LastRow as Long, but that worked great. Thank you.

The final code is as follows.
Cheers
G_M

 
Public Sub SortDocumentInExcel()
        Dim strWorkingFile As String
        Dim objExcel As Excel.Application
        Dim objWorkbook As Excel.Workbook
        Dim objWorkSheet As Excel.Worksheet
        Dim objExcelRange As Excel.Range
        Dim LastRow As Long

        strWorkingFile = frmGetReferences.lblOutputFolder.Text & "wf_" & frmGetReferences.txtOutputFilename.Text & ".xlsx"
        objExcel = CreateObject("Excel.Application")
        objWorkbook = objExcel.Workbooks.Open(strWorkingFile)
        objWorkSheet = objWorkbook.Sheets("sheet1")

        ' Sort the data in the "A" column; expanding selection to other columns
        objWorkSheet.Cells.Sort(Key1:=objWorkSheet.Range("A2"), _
                                Order1:=Excel.XlSortOrder.xlAscending, Header:=Excel.XlYesNoGuess.xlYes, _
                                Orientation:=Excel.XlSortOrientation.xlSortColumns, SortMethod:=Excel.XlSortMethod.xlPinYin)

        'Remove rows based on duplicate values from "A" column
        LastRow = objWorkSheet.Range("A" & objWorkSheet.Rows.Count).End(Excel.XlDirection.xlUp).Row
        objExcelRange = objWorkSheet.Range("A1").Resize(LastRow, 3)
        objExcelRange.RemoveDuplicates(Columns:=1, Header:=Excel.XlYesNoGuess.xlYes)



        objWorkbook.Save()
        objWorkbook.Close()
        objExcel.Quit()

        objExcel = Nothing
        objWorkbook = Nothing
        objWorkSheet = Nothing


    End Sub

Open in new window

0
 
G_MAuthor Commented:
Dim LastRow As Long and this answer is spot on... Thank you
0

Featured Post

Important Lessons on Recovering from Petya

In their most recent webinar, Skyport Systems explores ways to isolate and protect critical databases to keep the core of your company safe from harm.

  • 3
  • 2
Tackle projects and never again get stuck behind a technical roadblock.
Join Now