xenium
asked on
Function to return % match between 2 strings
Here's a tricky one.
I'd like a function to compare 2 input strings and return % of chars that match. Position ignored but order considered.
eg.
Msgbox Similarity("Examples","An example")
would return 7/10 = 70% (10 chars in total, 7 of which match in order.)
Msgbox Similarity("a difficult one","tricky one")
would return 6/15 = 40% (6 letters match in order "IC" then " ONE" ) ( "T" then " ONE" also matches but this scores less so would be dropped)
Thanks!!!!
Slightly related:
https://www.experts-exchange.com/questions/20325588/Comparing-Text-Strings-In-Access.html?query=compare+text&topics=39
I'd like a function to compare 2 input strings and return % of chars that match. Position ignored but order considered.
eg.
Msgbox Similarity("Examples","An example")
would return 7/10 = 70% (10 chars in total, 7 of which match in order.)
Msgbox Similarity("a difficult one","tricky one")
would return 6/15 = 40% (6 letters match in order "IC" then " ONE" ) ( "T" then " ONE" also matches but this scores less so would be dropped)
Thanks!!!!
Slightly related:
https://www.experts-exchange.com/questions/20325588/Comparing-Text-Strings-In-Access.html?query=compare+text&topics=39
ASKER
Sub xxExample()
MsgBox Similarity("Examples", "An example")
End Sub
Function Similarity(str1, str2)
'int DamerauLevenshteinDistance (char str1[1..lenStr1], char str2[1..lenStr2])
' // d is a table with lenStr1+1 rows and lenStr2+1 columns
lenstr1 = Len(str1)
lenstr2 = Len(str2)
' declare int d[0..lenStr1, 0..lenStr2]
Dim d
ReDim d(lenstr1 + 1, lenstr2 + 1) As Integer
' // i and j are used to iterate over str1 and str2
' declare int i, j, cost
Dim i, j, cost As Integer
'
' for i from 0 to lenStr1
For i = 0 To lenstr1
' d i, 0:=i
d(i, 0) = i
Next i
' for j from 0 to lenStr2
For j = 0 To lenstr2
' d 0, j:=j
d(0, j) = j
Next j
'
' for i from 1 to lenStr1
For i = 1 To lenstr1
' for j from 1 to lenStr2
For j = 1 To lenstr2
' if str1[i] = str2[j] then cost := 0
If Mid(str1, i, 1) = Mid(str2, j, 1) Then
cost = 0
' else cost := 1
Else
cost = 1
End If
' d[i, j] := minimum(
' d[i-1, j ] + 1, // deletion
' d[i , j-1] + 1, // insertion
' d[i-1, j-1] + cost // substitution
' )
d(i, j) = Minimum(d(i - 1, j) + 1, d(i, j - 1) + 1, d(i - 1, j - 1) + cost)
' if(i > 1 and j > 1 and str1[i] = str2[j-1] and str1[i-1] = str2[j]) then
' d[i, j] := minimum(
' d[i, j],
' d[i-2, j-2] + cost // transposition
' )
If i > 1 And j > 1 Then
If Mid(str1, i, 1) = Mid(str2, j - 1, 1) And Mid(str1, i - 1, 1) = Mid(str2, j, 1) Then
d(i, j) = Minimum(d(i, j), d(i - 2, j - 2) + cost)
End If
End If
Next j
Next i
'
'
' return d[lenStr1, lenStr2]
Similiarity = d(lenstr1, lenstr2)
End Function
' Return the smallest parameter value.
Function Minimum(ParamArray values() As Variant) As _
Variant
Dim i As Integer
Dim min_value As Variant
min_value = values(LBound(values))
For i = LBound(values) + 1 To UBound(values)
If min_value > values(i) Then min_value = values(i)
Next i
Minimum = min_value
End Function
MsgBox Similarity("Examples", "An example")
End Sub
Function Similarity(str1, str2)
'int DamerauLevenshteinDistance
' // d is a table with lenStr1+1 rows and lenStr2+1 columns
lenstr1 = Len(str1)
lenstr2 = Len(str2)
' declare int d[0..lenStr1, 0..lenStr2]
Dim d
ReDim d(lenstr1 + 1, lenstr2 + 1) As Integer
' // i and j are used to iterate over str1 and str2
' declare int i, j, cost
Dim i, j, cost As Integer
'
' for i from 0 to lenStr1
For i = 0 To lenstr1
' d i, 0:=i
d(i, 0) = i
Next i
' for j from 0 to lenStr2
For j = 0 To lenstr2
' d 0, j:=j
d(0, j) = j
Next j
'
' for i from 1 to lenStr1
For i = 1 To lenstr1
' for j from 1 to lenStr2
For j = 1 To lenstr2
' if str1[i] = str2[j] then cost := 0
If Mid(str1, i, 1) = Mid(str2, j, 1) Then
cost = 0
' else cost := 1
Else
cost = 1
End If
' d[i, j] := minimum(
' d[i-1, j ] + 1, // deletion
' d[i , j-1] + 1, // insertion
' d[i-1, j-1] + cost // substitution
' )
d(i, j) = Minimum(d(i - 1, j) + 1, d(i, j - 1) + 1, d(i - 1, j - 1) + cost)
' if(i > 1 and j > 1 and str1[i] = str2[j-1] and str1[i-1] = str2[j]) then
' d[i, j] := minimum(
' d[i, j],
' d[i-2, j-2] + cost // transposition
' )
If i > 1 And j > 1 Then
If Mid(str1, i, 1) = Mid(str2, j - 1, 1) And Mid(str1, i - 1, 1) = Mid(str2, j, 1) Then
d(i, j) = Minimum(d(i, j), d(i - 2, j - 2) + cost)
End If
End If
Next j
Next i
'
'
' return d[lenStr1, lenStr2]
Similiarity = d(lenstr1, lenstr2)
End Function
' Return the smallest parameter value.
Function Minimum(ParamArray values() As Variant) As _
Variant
Dim i As Integer
Dim min_value As Variant
min_value = values(LBound(values))
For i = LBound(values) + 1 To UBound(values)
If min_value > values(i) Then min_value = values(i)
Next i
Minimum = min_value
End Function
ASKER
This above function is returning no result - any ideas? The commented part is from the pseudo-code here:
http://en.wikipedia.org/wiki/Levenshtein_distance
http://en.wikipedia.org/wiki/Levenshtein_distance
ASKER
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
Ironic!
ASKER
Follow-up to improve processing performance:
https://www.experts-exchange.com/questions/21891684/How-to-calculate-match-between-two-text-strings-part-II.html
https://www.experts-exchange.com/questions/21891684/How-to-calculate-match-between-two-text-strings-part-II.html
I hope you didn't mean that my answer was ironic... If so, it wasn't meant that way.
And I don't think I can help you improve the algorythm. This is way above my head!
(°v°)
And I don't think I can help you improve the algorythm. This is way above my head!
(°v°)
ASKER
https://www.experts-exchange.com/questions/21868284/How-to-calculate-match-between-two-text-strings.html