【发布时间】:2014-11-20 02:32:35
【问题描述】:
我是 SQL 新手,并试图加快存储过程中多次使用的查询。我正在尝试使用查询来匹配地点的名称。有些地方的名称长度不同,例如,同一个地址在一张表中可能称为“McDonalds Riccarton”,而在另一张表中可能称为“Mac Donalds Riccarton”。
我已将每个表中的每个单词分成单独的列,即“McDonalds, Riccarton”和“Mac, Donalds, Riccarton”。这些被称为第一个单词,第二个被称为第二个单词等等......正如您将在我的查询中看到的那样
通过使用直接比较或使用 soundex,我试图根据松散的匹配项来匹配它们,例如 soundex (mac) = soundex(macdonalds) 和 soundex(riccarton) = soundex(riccarton) 应该是一样的。
查询尝试将第一个单词与所有其他列匹配,即第一个匹配第一个或第二个或第三个或第四个或最后一个...最后一个是最后一个单词或任何长于第四个单词的单词组名字..
我对当前的查询很满意,只是它有点慢。当匹配一个包含 50 个名称和另一个包含 600 个名称的表时,大约需要 2 分钟。它显然必须尝试在每个“或”循环中匹配,直到它跌倒然后尝试下一个循环......让它变慢。如果可能的话,我想避免使用游标。提前喝彩。
insert into
dbo.MatchedCulture_Recreation_Sports
select
loc.id,
loc.name,
cpn.cpn_id,
cpn.cpn_name
from
#NZF_CPN_Culture_Recreation_Sports cpn
inner join #Locations_Culture_Recreation_Sports loc
on
(
--where they match in string size see if the names match exactly
(
cpn.stringsize = loc.stringsize and
cpn.first = loc.first and
loc.last = cpn.last and
cpn.second = loc.second and
cpn.third = loc.third and
cpn.fourth = loc.Fourth
)
or
--or where they arent equal and the name isnt one word
(
cpn.stringsize <> loc.stringsize and
cpn.stringsize <> 1 and
loc.stringsize <>1 and
(
-- see if the first word matches anything
cpn.first = loc.first or
cpn.first = loc.second or
cpn.first = loc.third or
cpn.first = loc.fourth or
cpn.first = loc.last
)
and
-- and the last word matches anything
(
cpn.last = loc.first or
cpn.last = loc.second or
cpn.last = loc.third or
cpn.last = loc.fourth or
cpn.last = loc.last
)
and
-- and the sec matches anything
(
cpn.second = loc.first or
cpn.second = loc.second or
cpn.second = loc.third or
cpn.second = loc.fourth or
cpn.second = loc.last
)
-- or if the there are 3 words in one and 2 words in another try soundex
)
or
(
cpn.stringsize = 2 and
loc.stringsize = 3 and
cpn.stringsize <> 1 and
loc.stringsize <> 1 and
(
SOUNDEX(cpn.first) = SOUNDEX(loc.first) or
SOUNDEX(cpn.first) = SOUNDEX(loc.second) or
SOUNDEX(cpn.first) = SOUNDEX(loc.last)
)
and
(
SOUNDEX(cpn.last) = SOUNDEX(loc.first) or
SOUNDEX(cpn.last) = SOUNDEX(loc.second) or
SOUNDEX(cpn.last) = SOUNDEX(loc.last)
)
)
-- or if the there are 3 words in the other and 2 words in one try soundex
or
(
cpn.stringsize = 3 and
loc.stringsize = 2 and
cpn.stringsize <> 1 and
loc.stringsize <> 1 and
(
SOUNDEX(loc.first) = SOUNDEX(cpn.first) or
SOUNDEX(loc.first) = SOUNDEX(cpn.second) or
SOUNDEX(loc.first) = SOUNDEX(cpn.last)
)
and
(
SOUNDEX(loc.last) = SOUNDEX(cpn.first) or
SOUNDEX(loc.last) = SOUNDEX(cpn.second) or
SOUNDEX(loc.last) = SOUNDEX(cpn.last)
)
)
or
-- or if the there are more than 3 words in one and 3 words in another try soundex
(
cpn.stringsize <3 and
loc.stringsize = 3 and
cpn.stringsize <> 1 and
loc.stringsize <> 1 and
(
SOUNDEX(loc.first) = SOUNDEX(cpn.first) or
SOUNDEX(loc.first) = SOUNDEX(cpn.second) or
SOUNDEX(loc.first) = SOUNDEX(cpn.third) or
SOUNDEX(loc.first) = SOUNDEX(cpn.last)
)
and
(
SOUNDEX(loc.second) = SOUNDEX(cpn.first) or
SOUNDEX(loc.second) = SOUNDEX(cpn.second) or
SOUNDEX(loc.second) = SOUNDEX(cpn.third) or
SOUNDEX(loc.second) = SOUNDEX(cpn.last)
)
and
(
SOUNDEX(loc.last) = SOUNDEX(cpn.first) or
SOUNDEX(loc.last) = SOUNDEX(cpn.second) or
SOUNDEX(loc.last) = SOUNDEX(cpn.third) or
SOUNDEX(loc.last) = SOUNDEX(cpn.last)
)
)
or
-- or if the there are 3 words in the other and 3 words in one try soundex
(
cpn.stringsize = 3 and
loc.stringsize < 3 and
cpn.stringsize <> 1 and
loc.stringsize <> 1 and
(
SOUNDEX(cpn.first) = SOUNDEX(loc.first) or
SOUNDEX(cpn.first) = SOUNDEX(loc.second) or
SOUNDEX(cpn.first) = SOUNDEX(loc.third) or
SOUNDEX(cpn.first) = SOUNDEX(loc.last)
)
and
(
SOUNDEX(cpn.second) = SOUNDEX(loc.first) or
SOUNDEX(cpn.second) = SOUNDEX(loc.second) or
SOUNDEX(cpn.second) = SOUNDEX(loc.third) or
SOUNDEX(cpn.second) = SOUNDEX(loc.last)
)
and
(
SOUNDEX(cpn.last) = SOUNDEX(loc.first) or
SOUNDEX(cpn.last) = SOUNDEX(loc.second) or
SOUNDEX(cpn.last) = SOUNDEX(loc.third) or
SOUNDEX(cpn.last) = SOUNDEX(loc.last)
)
)
)
and
-- search within a distance
(
loc.latitude < cpn.Maxlat and
loc.latitude > cpn.Minlat and
loc.longitude < cpn.Maxlon and
loc.longitude > cpn.Minlon
)
【问题讨论】:
标签: sql gis geospatial spatial