Link to home
Create AccountLog in
Avatar of Sam OZ
Sam OZFlag for Australia

asked on

Match in excel for almost same entries

I have two excel sheets  TagDesign and TagActual    There are many entries which are similar but slightly different  (For example TG-1234 and TG1234)
Can we have a list in excel  showing the entries  with difference in just one character
(It can be MS Access or Sql Server or a small macro by which I can get this result)

  The typical result I look will be something like
   Tag design        TagActual
       TG-1234          TG1234
       P#3456            P3456
       PVT 12             PVT-12        ( Space and hyphen)
Avatar of Gustav Brock
Gustav Brock
Flag of Denmark image

You could use a simple expression using Replace to identify matches:

Replace(Replace(Replace([Tag design], "-", ""), "#", ""), " ", "") = Replace(Replace(Replace([Tag actual], "-", ""), "#", ""), " ", "")

Open in new window

ASKER CERTIFIED SOLUTION
Avatar of Rob Henson
Rob Henson
Flag of United Kingdom of Great Britain and Northern Ireland image

Link to home
membership
Create a free account to see this answer
Signing up is free and takes 30 seconds. No credit card required.
See answer
User generated imageI got this file from a PAQ, sorry did not record the Expert
Public Function Similarity(ByVal String1 As String, _
    ByVal String2 As String, _
    Optional ByRef RetMatch As String, _
    Optional min_match = 1) As Single
Dim b1() As Byte, b2() As Byte
Dim lngLen1 As Long, lngLen2 As Long
Dim lngResult As Long

If UCase(String1) = UCase(String2) Then
    Similarity = 1
Else:
    lngLen1 = Len(String1)
    lngLen2 = Len(String2)
    If (lngLen1 = 0) Or (lngLen2 = 0) Then
        Similarity = 0
    Else:
        b1() = StrConv(UCase(String1), vbFromUnicode)
        b2() = StrConv(UCase(String2), vbFromUnicode)
        lngResult = Similarity_sub(0, lngLen1 - 1, _
        0, lngLen2 - 1, _
        b1, b2, _
        String1, _
        RetMatch, _
        min_match)
        Erase b1
        Erase b2
        If lngLen1 >= lngLen2 Then
            Similarity = lngResult / lngLen1
        Else
            Similarity = lngResult / lngLen2
        End If
    End If
End If

End Function

Private Function Similarity_sub(ByVal start1 As Long, ByVal end1 As Long, _
                                ByVal start2 As Long, ByVal end2 As Long, _
                                ByRef b1() As Byte, ByRef b2() As Byte, _
                                ByVal FirstString As String, _
                                ByRef RetMatch As String, _
                                ByVal min_match As Long, _
                                Optional recur_level As Integer = 0) As Long
'* CALLED BY: Similarity *(RECURSIVE)

Dim lngCurr1 As Long, lngCurr2 As Long
Dim lngMatchAt1 As Long, lngMatchAt2 As Long
Dim I As Long
Dim lngLongestMatch As Long, lngLocalLongestMatch As Long
Dim strRetMatch1 As String, strRetMatch2 As String

If (start1 > end1) Or (start1 < 0) Or (end1 - start1 + 1 < min_match) _
Or (start2 > end2) Or (start2 < 0) Or (end2 - start2 + 1 < min_match) Then
    Exit Function '(exit if start/end is out of string, or length is too short)
End If

For lngCurr1 = start1 To end1
    For lngCurr2 = start2 To end2
        I = 0
        Do Until b1(lngCurr1 + I) <> b2(lngCurr2 + I)
            I = I + 1
            If I > lngLongestMatch Then
                lngMatchAt1 = lngCurr1
                lngMatchAt2 = lngCurr2
                lngLongestMatch = I
            End If
            If (lngCurr1 + I) > end1 Or (lngCurr2 + I) > end2 Then Exit Do
        Loop
    Next lngCurr2
Next lngCurr1

If lngLongestMatch < min_match Then Exit Function

lngLocalLongestMatch = lngLongestMatch
RetMatch = ""

lngLongestMatch = lngLongestMatch _
+ Similarity_sub(start1, lngMatchAt1 - 1, _
start2, lngMatchAt2 - 1, _
b1, b2, _
FirstString, _
strRetMatch1, _
min_match, _
recur_level + 1)
If strRetMatch1 <> "" Then
    RetMatch = RetMatch & strRetMatch1 & "*"
Else
    RetMatch = RetMatch & IIf(recur_level = 0 _
    And lngLocalLongestMatch > 0 _
    And (lngMatchAt1 > 1 Or lngMatchAt2 > 1) _
    , "*", "")
End If


RetMatch = RetMatch & Mid$(FirstString, lngMatchAt1 + 1, lngLocalLongestMatch)


lngLongestMatch = lngLongestMatch _
+ Similarity_sub(lngMatchAt1 + lngLocalLongestMatch, end1, _
lngMatchAt2 + lngLocalLongestMatch, end2, _
b1, b2, _
FirstString, _
strRetMatch2, _
min_match, _
recur_level + 1)

If strRetMatch2 <> "" Then
    RetMatch = RetMatch & "*" & strRetMatch2
Else
    RetMatch = RetMatch & IIf(recur_level = 0 _
    And lngLocalLongestMatch > 0 _
    And ((lngMatchAt1 + lngLocalLongestMatch < end1) _
    Or (lngMatchAt2 + lngLocalLongestMatch < end2)) _
    , "*", "")
End If

Similarity_sub = lngLongestMatch

End Function


'Actual function that compares two strings.
Private Function PercentTheSame(ByVal Text As String, ByVal CompareWith As String, Optional ByVal CaseSensitive As Boolean = False) As Single
    Dim lonLenText As Long, lonLenCompare As Long
    Dim lonLoop As Long, lonDiff As Long
    Dim strCur As String, strC As String
    
    lonLenText = Len(Text)
    lonLenCompare = Len(CompareWith)
    
    For lonLoop = 1 To lonLenText
        
        If lonLoop > lonLenCompare Then
            lonDiff = lonDiff + 1
        Else
            
            If CaseSensitive = False Then
                strCur = LCase$(Mid$(Text, lonLoop, 1))
                strC = LCase$(Mid$(CompareWith, lonLoop, 1))
            Else
                strCur = Mid$(Text, lonLoop, 1)
                strC = Mid$(CompareWith, lonLoop, 1)
            End If
            
            If Not strCur = strC Then
                lonDiff = lonDiff + 1
            End If
        
        End If
    
    Next lonLoop
    
    PercentTheSame = CSng(((lonLenText - lonDiff) / lonLenText) * 100)
End Function
 
'Finds which string is longer to use in the comparison function.
Public Function StartCompare(ByVal Text As String, ByVal CompareWith As String, Optional ByVal CaseSensitive As Boolean = False) As Single
    
    'Quick test so we don't waste time.
    If Text = CompareWith Then
        StartCompare = 100
        Exit Function
    End If
    
    If Len(Text) > Len(CompareWith) Then
        StartCompare = PercentTheSame(Text, CompareWith, CaseSensitive)
    Else
        StartCompare = PercentTheSame(CompareWith, Text, CaseSensitive)
    End If
    
End Function

Open in new window