使用 Lawrence Philips 创建的 Metaphone 功能,我们在姓名和地址匹配方面取得了良好的效果。它的工作方式与 Soundex 类似,但为整个值创建了声音/辅音模式。您可能会发现这与其他一些技术结合使用很有用,特别是如果您可以去除一些绒毛,例如“co”。和“公司”如其他 cmets 所述:
create function [dbo].[Metaphone](@str as nvarchar(70), @KeepNumeric as bit = 0)
returns nvarchar(25)
/*
Metaphone Algorithm
Created by Lawrence Philips.
Metaphone presented in article in "Computer Language" December 1990 issue.
*********** BEGIN METAPHONE RULES ***********
Lawrence Philips' RULES follow:
The 16 consonant sounds:
|--- ZERO represents "th"
|
B X S K J T F H L M N P R 0 W Y
Drop vowels
Exceptions:
Beginning of word: "ae-", "gn", "kn-", "pn-", "wr-" ----> drop first letter
Beginning of word: "wh-" ----> change to "w"
Beginning of word: "x" ----> change to "s"
Beginning of word: vowel or "H" + vowel ----> Keep it
Transformations:
B ----> B unless at the end of word after "m", as in "dumb", "McComb"
C ----> X (sh) if "-cia-" or "-ch-"
S if "-ci-", "-ce-", or "-cy-"
SILENT if "-sci-", "-sce-", or "-scy-"
K otherwise
K "-sch-"
D ----> J if in "-dge-", "-dgy-", or "-dgi-"
T otherwise
F ----> F
G ----> SILENT if "-gh-" and not at end or before a vowel
"-gn" or "-gned"
"-dge-" etc., as in above rule
J if "gi", "ge", "gy" if not double "gg"
K otherwise
H ----> SILENT if after vowel and no vowel follows
or "-ch-", "-sh-", "-ph-", "-th-", "-gh-"
H otherwise
J ----> J
K ----> SILENT if after "c"
K otherwise
L ----> L
M ----> M
N ----> N
P ----> F if before "h"
P otherwise
Q ----> K
R ----> R
S ----> X (sh) if "sh" or "-sio-" or "-sia-"
S otherwise
T ----> X (sh) if "-tia-" or "-tio-"
0 (th) if "th"
SILENT if "-tch-"
T otherwise
V ----> F
W ----> SILENT if not followed by a vowel
W if followed by a vowel
X ----> KS
Y ----> SILENT if not followed by a vowel
Y if followed by a vowel
Z ----> S
*/
as
begin
declare @Result varchar(25)
,@str3 char(3)
,@str2 char(2)
,@str1 char(1)
,@strp char(1)
,@strLen tinyint
,@cnt tinyint
set @strLen = len(@str)
set @cnt = 0
set @Result = ''
-- Preserve first 5 numeric values when required
if @KeepNumeric = 1
begin
set @Result = case when isnumeric(substring(@str,1,1)) = 1
then case when isnumeric(substring(@str,2,1)) = 1
then case when isnumeric(substring(@str,3,1)) = 1
then case when isnumeric(substring(@str,4,1)) = 1
then case when isnumeric(substring(@str,5,1)) = 1
then left(@str,5)
else left(@str,4)
end
else left(@str,3)
end
else left(@str,2)
end
else left(@str,1)
end
else ''
end
set @str = right(@str,len(@str)-len(@Result))
end
--Process beginning exceptions
set @str2 = left(@str,2)
if @str2 = 'wh'
begin
set @str = 'w' + right(@str , @strLen - 2)
set @strLen = @strLen - 1
end
else
if @str2 in('ae', 'gn', 'kn', 'pn', 'wr')
begin
set @str = right(@str , @strLen - 1)
set @strLen = @strLen - 1
end
set @str1 = left(@str,1)
if @str1 = 'x'
set @str = 's' + right(@str , @strLen - 1)
else
if @str1 in ('a','e','i','o','u')
begin
set @str = right(@str, @strLen - 1)
set @strLen = @strLen - 1
set @Result = @Result + @str1
end
while @cnt <= @strLen
begin
set @cnt = @cnt + 1
set @str1 = substring(@str,@cnt,1)
set @strp = case when @cnt <> 0
then substring(@str,(@cnt-1),1)
else ' '
end
-- Check if the current character is the same as the previous character.
-- If we are keeping numbers, only compare non-numeric characters.
if case when @KeepNumeric = 1 and @strp = @str1 and isnumeric(@str1) = 0 then 1
when @KeepNumeric = 0 and @strp = @str1 then 1
else 0
end = 1
continue -- Skip this loop
set @str2 = substring(@str,@cnt,2)
set @Result = case when @KeepNumeric = 1 and isnumeric(@str1) = 1
then @Result + @str1
when @str1 in('f','j','l','m','n','r')
then @Result + @str1
when @str1 = 'q'
then @Result + 'k'
when @str1 = 'v'
then @Result + 'f'
when @str1 = 'x'
then @Result + 'ks'
when @str1 = 'z'
then @Result + 's'
when @str1 = 'b'
then case when @cnt = @strLen
then case when substring(@str,(@cnt - 1),1) <> 'm'
then @Result + 'b'
else @Result
end
else @Result + 'b'
end
when @str1 = 'c'
then case when @str2 = 'ch' or substring(@str,@cnt,3) = 'cia'
then @Result + 'x'
else case when @str2 in('ci','ce','cy') and @strp <> 's'
then @Result + 's'
else @Result + 'k'
end
end
when @str1 = 'd'
then case when substring(@str,@cnt,3) in ('dge','dgy','dgi')
then @Result + 'j'
else @Result + 't'
end
when @str1 = 'g'
then case when substring(@str,(@cnt - 1),3) not in ('dge','dgy','dgi','dha','dhe','dhi','dho','dhu')
then case when @str2 in('gi', 'ge','gy')
then @Result + 'j'
else case when @str2 <> 'gn' or (@str2 <> 'gh' and @cnt+1 <> @strLen)
then @Result + 'k'
else @Result
end
end
else @Result
end
when @str1 = 'h'
then case when @strp not in ('a','e','i','o','u') and @str2 not in ('ha','he','hi','ho','hu')
then case when @strp not in ('c','s','p','t','g')
then @Result + 'h'
else @Result
end
else @Result
end
when @str1 = 'k'
then case when @strp <> 'c'
then @Result + 'k'
else @Result
end
when @str1 = 'p'
then case when @str2 = 'ph'
then @Result + 'f'
else @Result + 'p'
end
when @str1 = 's'
then case when substring(@str,@cnt,3) in ('sia','sio') or @str2 = 'sh'
then @Result + 'x'
else @Result + 's'
end
when @str1 = 't'
then case when substring(@str,@cnt,3) in ('tia','tio')
then @Result + 'x'
else case when @str2 = 'th'
then @Result + '0'
else case when substring(@str,@cnt,3) <> 'tch'
then @Result + 't'
else @Result
end
end
end
when @str1 = 'w'
then case when @str2 not in('wa','we','wi','wo','wu')
then @Result + 'w'
else @Result
end
when @str1 = 'y'
then case when @str2 not in('ya','ye','yi','yo','yu')
then @Result + 'y'
else @Result
end
else @Result
end
end
return @Result
end