Sign up to receive Decoded, a new monthly digest with product updates, feature release info, continuing education opportunities, and more.
Option Explicit
Sub FindIdentical()
Dim lngLastRow As Long
Dim lngRow1 As Long
Dim lngRow2 As Long
Dim strWords1() As String
Dim strWords2() As String
Dim intPos As Integer
Dim intMinMatch As Integer
Dim intMatching As Integer
Dim lngIndex1 As Long
Dim lngIndex2 As Long
Dim colWords1 As Collection
Dim colWords2 As Collection
Range("B1").EntireColumn.ClearContents
intMinMatch = InputBox("Please enter the number of words that have to match in order to make an URL identical", "How many words?", 3)
lngLastRow = Range("A65536").End(xlUp).Row
For lngRow1 = 1 To lngLastRow
' Break up the words in the last part of the URL into a collection
' of unique words
intPos = InStrRev(Cells(lngRow1, 1).Value, "\")
strWords1 = Split(ConvertSeparators(Mid(Cells(lngRow1, 1).Value, intPos + 1)), "-")
Set colWords1 = New Collection
On Error Resume Next
For lngIndex = 0 To UBound(strWords1)
colWords1.Add strWords1(lngIndex), strWords1(lngIndex)
If Err.Number = 457 Then
' The word is already in the collection
Err.Clear
End If
Next
On Error GoTo 0
' Compare the value in lngRow1 with all the rows below it
For lngRow2 = lngRow1 + 1 To lngLastRow
intMatching = 0
intPos = InStrRev(Cells(lngRow2, 1).Value, "\")
strWords2 = Split(ConvertSeparators(Mid(Cells(lngRow2, 1).Value, intPos + 1)), "-")
Set colWords2 = New Collection
On Error Resume Next
For lngIndex = 0 To UBound(strWords2)
colWords2.Add strWords2(lngIndex), strWords2(lngIndex)
If Err.Number = 457 Then
Err.Clear
End If
Next
' Compare the words. (Colletions start at index 1)
For lngIndex1 = 1 To colWords1.Count
For lngIndex2 = 1 To colWords2.Count
If lngIndex1 <= colWords2.Count Then
If colWords1(lngIndex1) = colWords2(lngIndex2) Then
intMatching = intMatching + 1
Exit For
End If
End If
Next
Next
If intMatching >= intMinMatch Then
If Cells(lngRow1, 2).Value = "" Then
Cells(lngRow1, 2).Value = "This URL has " & intMatching & " words that match words in " & Cells(lngRow2, 1).Value
Else
Cells(lngRow1, 2).Value = Cells(lngRow1, 2).Value & vbCrLf & " AND " & intMatching & " words that match words in " & Cells(lngRow2, 1).Value
End If
End If
Next
Next
End Sub
Private Function ConvertSeparators(strWords As String) As String
' This function changes all listed separators to dashes
Dim varSeps As Variant
Dim lngIndex As Long
' Add any characters in additon to the dash that can be separators. I added
' the # and [ as examples of how you'd add them
varSeps = Array("#", "[")
For lngIndex = 0 To UBound(varSeps)
strWords = Replace(strWords, varSeps(lngIndex), "-")
Next
ConvertSeparators = strWords
End Function
Q-28274528.xlsm
Column "B" can we have numbers if 2 are duplicate to pair those 2 lets place number (1)Should I use 3 for the next duplicate, and then 4 and then 5, etc?
Then if next one has a duplicate with 4 URL's then lets place (2) 4 times
Sub FindIdentical()
Dim lngLastRow As Long
Dim lngRow1 As Long
Dim lngRow2 As Long
Dim strWords1() As String
Dim strWords2() As String
Dim intPos As Integer
Dim intMinMatch As Integer
Dim intMatching As Integer
Dim lngIndex1 As Long
Dim lngIndex2 As Long
Dim colWords1 As Collection
Dim colWords2 As Collection
Dim varData As Variant
Dim lngMarker As Long
Range("B1").EntireColumn.ClearContents
intMinMatch = InputBox("Please enter the number of words that have to match in order to make an URL identical", "How many words?", 3)
lngLastRow = Range("A65536").End(xlUp).Row
varData = Range("A1:B" & lngLastRow).Value
For lngRow1 = 1 To lngLastRow
' Break up the words in the last part of the URL into a collection
' of unique words
intPos = InStrRev(varData(lngRow1, 1), "/")
strWords1 = Split(ConvertSeparators(Mid(varData(lngRow1, 1), intPos + 1)), "-")
Set colWords1 = New Collection
On Error Resume Next
For lngIndex1 = 0 To UBound(strWords1)
Debug.Print UBound(strWords1)
colWords1.Add strWords1(lngIndex1), strWords1(lngIndex1)
If Err.Number = 457 Then
' The word is already in the collection
Err.Clear
End If
Next
On Error GoTo 0
' Compare the value in lngRow1 with all the rows below it
For lngRow2 = lngRow1 + 1 To lngLastRow
intMatching = 0
intPos = InStrRev(varData(lngRow2, 1), "/")
strWords2 = Split(ConvertSeparators(Mid(varData(lngRow2, 1), intPos + 1)), "-")
Set colWords2 = New Collection
On Error Resume Next
For lngIndex2 = 0 To UBound(strWords2)
colWords2.Add strWords2(lngIndex2), strWords2(lngIndex2)
If Err.Number = 457 Then
Err.Clear
End If
Next
' Compare the words. (Colletions start at index 1)
For lngIndex1 = 1 To colWords1.Count
For lngIndex2 = 1 To colWords2.Count
If lngIndex1 <= colWords2.Count Then
If colWords1(lngIndex1) = colWords2(lngIndex2) Then
intMatching = intMatching + 1
Exit For
End If
End If
Next
Next
If intMatching >= intMinMatch Then
If varData(lngRow1, 2) = "" Then
varData(lngRow1, 2) = "This URL has " & intMatching & " words that match words in " & varData(lngRow2, 1)
Else
varData(lngRow1, 2) = varData(lngRow1, 2) & vbCrLf & " AND " & intMatching & " words that match words in " & varData(lngRow2, 1)
End If
End If
Next
Next
Range("A1:B" & lngLastRow).Value = varData
End Sub
Sub FindIdentical()
Dim lngLastRow As Long
Dim lngRow1 As Long
Dim lngRow2 As Long
Dim strWords1() As String
Dim intPos As Integer
Dim intMinMatch As Integer
Dim intMatching As Integer
Dim lngIndex1 As Long
Dim lngIndex2 As Long
Dim colWords1 As Collection
Dim colWords2 As Collection
Dim varData As Variant
Dim lngMarker As Long
intMinMatch = InputBox("Please enter the number of words that have to match in order to make an URL identical", "How many words?", 3)
lngLastRow = Range("A65536").End(xlUp).Row
' Transfer all the data to an array. The data is only
' in columns A and B, but Q is used to leave room
' for 15 additional columns for the words
varData = Range("A1:Q" & lngLastRow).Value
Debug.Print Now
For lngRow1 = 1 To lngLastRow
' Split up all the words
intPos = InStrRev(varData(lngRow1, 1), "/")
strWords1 = Split(ConvertSeparators(Mid(varData(lngRow1, 1), intPos + 1)), "-")
Set colWords1 = New Collection
On Error Resume Next
For lngIndex1 = 0 To UBound(strWords1)
colWords1.Add strWords1(lngIndex1), strWords1(lngIndex1)
If Err.Number = 457 Then
' The word is already in the collection
Err.Clear
End If
Next
intPos = 3
For lngIndex1 = 1 To colWords1.Count
varData(lngRow1, intPos) = colWords1(lngIndex1)
intPos = intPos + 1
Next
On Error GoTo 0
Next
For lngRow1 = 1 To lngLastRow
Debug.Print lngRow1 & " - " & Now
' Compare the words in lngRow1 with the words in all the rows below it
For lngRow2 = lngRow1 + 1 To lngLastRow
intMatching = 0
For lngIndex1 = 3 To 17
If varData(lngRow1, lngIndex1) = "" Then
Exit For
Else
If varData(lngRow1, lngIndex1) = varData(lngRow2, 3) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 4) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 5) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 6) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 7) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 8) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 9) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 10) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 11) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 12) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 13) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 14) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 15) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 16) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 17) Then
intMatching = intMatching + 1
End If
End If
Next
If intMatching >= intMinMatch Then
If varData(lngRow1, 2) = "" Then
varData(lngRow1, 2) = lngRow1 & ", " & lngRow2
Else
varData(lngRow1, 2) = varData(lngRow1, 2) & ", " & lngRow2
End If
End If
Next
Next
Range("A1:B" & lngLastRow).Value = varData
Debug.Print Now
End Sub
Sub FindIdentical()
Dim lngLastRow As Long
Dim lngRow1 As Long
Dim lngRow2 As Long
Dim strWords1() As String
Dim intPos As Integer
Dim intMinMatch As Integer
Dim intMatching As Integer
Dim lngIndex1 As Long
Dim lngIndex2 As Long
Dim colWords1 As Collection
Dim colWords2 As Collection
Dim varData As Variant
Dim lngLastMatch As Long
intMinMatch = InputBox("Please enter the number of words that have to match in order to make an URL identical", "How many words?", 3)
lngLastRow = Range("A65536").End(xlUp).Row
Columns("B:B").ClearContents
' Transfer all the data to an array. The data is only
' in columns A and B, but Q is used to leave room
' for 15 additional columns for the words
varData = Range("A1:Q" & lngLastRow).Value
'Debug.Print Now
For lngRow1 = 1 To lngLastRow
' Split up all the words
intPos = InStrRev(varData(lngRow1, 1), "/")
strWords1 = Split(ConvertSeparators(Mid(varData(lngRow1, 1), intPos + 1)), "-")
Set colWords1 = New Collection
On Error Resume Next
For lngIndex1 = 0 To UBound(strWords1)
colWords1.Add strWords1(lngIndex1), strWords1(lngIndex1)
If Err.Number = 457 Then
' The word is already in the collection
Err.Clear
End If
Next
intPos = 3
For lngIndex1 = 1 To colWords1.Count
varData(lngRow1, intPos) = colWords1(lngIndex1)
intPos = intPos + 1
Next
On Error GoTo 0
Next
For lngRow1 = 1 To lngLastRow
'Debug.Print lngRow1 & " - " & Now
' Compare the words in lngRow1 with the words in all the rows below it
For lngRow2 = lngRow1 + 1 To lngLastRow
intMatching = 0
For lngIndex1 = 3 To 17
If varData(lngRow1, lngIndex1) = "" Then
Exit For
Else
If varData(lngRow1, lngIndex1) = varData(lngRow2, 3) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 4) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 5) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 6) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 7) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 8) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 9) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 10) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 11) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 12) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 13) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 14) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 15) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 16) Or _
varData(lngRow1, lngIndex1) = varData(lngRow2, 17) Then
intMatching = intMatching + 1
End If
End If
Next
If intMatching >= intMinMatch Then
lngLastMatch = lngRow2
If varData(lngRow1, 2) = "" Then
varData(lngRow1, 2) = lngRow1 & ", " & lngRow2
Else
varData(lngRow1, 2) = varData(lngRow1, 2) & ", " & lngRow2
End If
End If
Next
varData(lngLastMatch, 2) = "'" & lngLastMatch
Next
Range("A1:B" & lngLastRow).Value = varData
'Debug.Print Now
End Sub
Sub FindIdentical()
Dim lngLastRow As Long
Dim lngRow1 As Long
Dim lngRow2 As Long
Dim strWords1() As String
Dim intPos As Integer
Dim intMinMatch As Integer
Dim intMatching As Integer
Dim lngIndex1 As Long
Dim lngIndex2 As Long
Dim colWords1 As Collection
Dim colWords2 As Collection
Dim varData As Variant
Dim lngMarker As Long
intMinMatch = InputBox("Please enter the number of words that have to match in order to make an URL identical", "How many words?", 3)
lngLastRow = Range("A65536").End(xlUp).Row
Columns("B:B").ClearContents
' Transfer all the data to an array. The data is only
' in columns A and B, but Q is used to leave room
' for 15 additional columns for the words
varData = Range("A1:Q" & lngLastRow).Value
'Debug.Print Now
For lngRow1 = 1 To lngLastRow
' Split up all the words
intPos = InStrRev(varData(lngRow1, 1), "/")
strWords1 = Split(ConvertSeparators(Mid(varData(lngRow1, 1), intPos + 1)), "-")
Set colWords1 = New Collection
On Error Resume Next
For lngIndex1 = 0 To UBound(strWords1)
colWords1.Add strWords1(lngIndex1), strWords1(lngIndex1)
If Err.Number = 457 Then
' The word is already in the collection
Err.Clear
End If
Next
intPos = 3
For lngIndex1 = 1 To colWords1.Count
varData(lngRow1, intPos) = colWords1(lngIndex1)
intPos = intPos + 1
Next
On Error GoTo 0
Next
For lngRow1 = 1 To lngLastRow
'If lngRow1 Mod 10 = 0 Then Debug.Print lngRow1 & " - " & Now
' Compare the words in lngRow1 with the words in all the rows below it
For lngRow2 = lngRow1 + 1 To lngLastRow
intMatching = 0
For lngIndex1 = 3 To 17
If varData(lngRow1, lngIndex1) = "" Then
Exit For
Else
Select Case True
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 3)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 4)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 5)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 6)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 7)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 8)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 9)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 10)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 11)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 12)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 13)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 14)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 15)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 16)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 17)
intMatching = intMatching + 1
End Select
End If
Next
If intMatching >= intMinMatch Then
If varData(lngRow1, 2) = "" Then
varData(lngRow1, 2) = lngRow1 & ", " & lngRow2
Else
varData(lngRow1, 2) = varData(lngRow1, 2) & ", " & lngRow2
End If
End If
Next
Next
Range("A1:B" & lngLastRow).Value = varData
'Debug.Print Now
End Sub
Sub FindIdentical()
Dim lngLastRow As Long
Dim lngRow1 As Long
Dim lngRow2 As Long
Dim strWords1() As String
Dim intPos As Integer
Dim intMinMatch As Integer
Dim intMatching As Integer
Dim lngIndex1 As Long
Dim lngIndex2 As Long
Dim colWords1 As Collection
Dim colWords2 As Collection
Dim varData As Variant
Dim lngMarker As Long
Dim lngLastMatch As Long
intMinMatch = InputBox("Please enter the number of words that have to match in order to make an URL identical", "How many words?", 3)
lngLastRow = Range("A65536").End(xlUp).Row
Columns("B:B").ClearContents
' Transfer all the data to an array. The data is only
' in columns A and B, but Q is used to leave room
' for 15 additional columns for the words
varData = Range("A1:Q" & lngLastRow).Value
'Debug.Print Now
For lngRow1 = 1 To lngLastRow
' Split up all the words
intPos = InStrRev(varData(lngRow1, 1), "/")
strWords1 = Split(ConvertSeparators(Mid(varData(lngRow1, 1), intPos + 1)), "-")
Set colWords1 = New Collection
On Error Resume Next
For lngIndex1 = 0 To UBound(strWords1)
colWords1.Add strWords1(lngIndex1), strWords1(lngIndex1)
If Err.Number = 457 Then
' The word is already in the collection
Err.Clear
End If
Next
intPos = 3
For lngIndex1 = 1 To colWords1.Count
varData(lngRow1, intPos) = colWords1(lngIndex1)
intPos = intPos + 1
Next
On Error GoTo 0
Next
For lngRow1 = 1 To lngLastRow
'If lngRow1 Mod 10 = 0 Then Debug.Print lngRow1 & " - " & Now
' Compare the words in lngRow1 with the words in all the rows below it
For lngRow2 = lngRow1 + 1 To lngLastRow
intMatching = 0
For lngIndex1 = 3 To 17
If varData(lngRow1, lngIndex1) = "" Then
Exit For
Else
Select Case True
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 3)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 4)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 5)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 6)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 7)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 8)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 9)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 10)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 11)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 12)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 13)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 14)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 15)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 16)
intMatching = intMatching + 1
Case varData(lngRow1, lngIndex1) = varData(lngRow2, 17)
intMatching = intMatching + 1
End Select
End If
Next
If intMatching >= intMinMatch Then
lngLastMatch = lngRow2
If varData(lngRow1, 2) = "" Then
varData(lngRow1, 2) = lngRow1 & ", " & lngRow2
Else
varData(lngRow1, 2) = varData(lngRow1, 2) & ", " & lngRow2
End If
End If
Next
varData(lngLastMatch, 2) = "'" & lngLastMatch
Next
Range("A1:B" & lngLastRow).Value = varData
'Debug.Print Now
End Sub
Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.
Have a better answer? Share it in a comment.
2) Do the words that may make it identical only come from the portion of the url after the last backslash?
3) In your example, all the identical urls follow each other. Is that a requirement? In other words assuming that the data was as follows, would 1 and 3 still be identical?
www.site.com\somename\this-is-the-page
http://www.experts-exchange.com/Software/Office_Productivity
www.site.com\somename\this-is-the-page-thats-is-identical
4) Do you want to know how many identical words there are?
5) How do want to identify the identical urls?