A family of Microsoft spreadsheet software with tools for analyzing, charting, and communicating data.
As this is the programming (VBA) forum, I'll offer a VBA solution to your post (note that I haven't used this recently, and you may have to tweak it for your own use). Credit for the code is attributed to the really smart people on this forum (with my apologies that I don't always do a good job of documenting where I got code when I drop it in my own personal.xlsb)
Paste the code into a module, then use the UDF by placing the formula on a worksheet. It will give you the top three matches including the numeric matching score for each.
'This version allows the use of ranges instead of strings, and returns the
'top three matches with their numeric match values. Perfect match is zero.
'match number is the number if character changes required to one string to
'make it match the comparison string.
'UDF
Function CustomFuzzy(rng1 As Range, rng2 As Range) As Variant '(rng1= single value, rng2=comparisons range)
Dim sRng As Range
Dim fArr(1 To 6)
tcount = rng2.Cells.Count
ReDim tArr(1 To tcount, 1 To 2)
i = 0
For Each sRng In rng2 ' = 1 To tcount 'Each tRng In rng2
i = i + 1
tArr(i, 2) = sRng.Value
tArr(i, 1) = Levenshtein(rng1.Value, sRng.Value)
Next
If tcount > 1 Then tArr = BubbleSrt(tArr, True)
'returns the top three matches in a HORIZONTAL array
For p = 1 To 3
fArr((2 \* p) - 1) = tArr(p, 1) 'numeric fuzzy score
fArr((2 \* p)) = tArr(p, 2) 'text
Next p
CustomFuzzy = fArr
End Function
'Option Base 0 assumed
'POB: fn with byte array is 17 times faster
Function Levenshtein(ByVal string1 As String, ByVal string2 As String) As Long
Dim i As Long, j As Long, bs1() As Byte, bs2() As Byte
Dim string1_length As Long
Dim string2_length As Long
Dim distance() As Long
Dim min1 As Long, min2 As Long, min3 As Long
string1_length = Len(string1)
string2_length = Len(string2)
ReDim distance(string1_length, string2_length)
bs1 = string1
bs2 = string2
For i = 0 To string1_length
distance(i, 0) = i
Next
For j = 0 To string2_length
distance(0, j) = j
Next
For i = 1 To string1_length
For j = 1 To string2\_length
'slow way: If Mid$(string1, i, 1) = Mid$(string2, j, 1) Then
If bs1((i - 1) \* 2) = bs2((j - 1) \* 2) Then ' \*2 because Unicode every 2nd byte is 0
distance(i, j) = distance(i - 1, j - 1)
Else
'distance(i, j) = Application.WorksheetFunction.Min \_
(distance(i - 1, j) + 1, \_
distance(i, j - 1) + 1, \_
distance(i - 1, j - 1) + 1)
' spell it out, 50 times faster than worksheetfunction.min
min1 = distance(i - 1, j) + 1
min2 = distance(i, j - 1) + 1
min3 = distance(i - 1, j - 1) + 1
If min1 <= min2 And min1 <= min3 Then
distance(i, j) = min1
ElseIf min2 <= min1 And min2 <= min3 Then
distance(i, j) = min2
Else
distance(i, j) = min3
End If
End If
Next
Next
Levenshtein = distance(string1_length, string2_length)
End Function
Public Function BubbleSrt(ArrayIn, Ascending As Boolean) 'started as 1D, converted to 2D sort
'in 2D, the first array parameter guides the lbound/ubound. I forgot the syntax for array.index to
'make this more flexible.
'The first parameter is the one it is sorted on
Dim SrtTemp As Variant
Dim i As Long
Dim j As Long
If Ascending = True Then
For i = LBound(ArrayIn) To UBound(ArrayIn)
For j = i + 1 To UBound(ArrayIn)
If ArrayIn(i, 1) > ArrayIn(j, 1) Then
'the 2D conversion is just the second half of each line, moving the second parameter
SrtTemp = ArrayIn(j, 1): SrtTemp2 = ArrayIn(j, 2)
ArrayIn(j, 1) = ArrayIn(i, 1): ArrayIn(j, 2) = ArrayIn(i, 2)
ArrayIn(i, 1) = SrtTemp: ArrayIn(i, 2) = SrtTemp2
End If
Next j
Next i
Else
For i = LBound(ArrayIn) To UBound(ArrayIn)
For j = i + 1 To UBound(ArrayIn)
If ArrayIn(i, 1) < ArrayIn(j, 1) Then
'the 2D conversion is just the second half of each line, moving the second parameter
SrtTemp = ArrayIn(j, 1): SrtTemp2 = ArrayIn(j, 2)
ArrayIn(j, 1) = ArrayIn(i, 1): ArrayIn(j, 2) = ArrayIn(i, 2)
ArrayIn(i, 1) = SrtTemp: ArrayIn(i, 2) = SrtTemp2
End If
Next j
Next i
End If
BubbleSrt = ArrayIn
End Function