【发布时间】:2011-12-24 13:02:55
【问题描述】:
我有一个查询如下(简化)...
SELECT *
FROM table1 AS a
INNER JOIN table2 AS b ON (a.name LIKE '%' + b.name + '%')
对于我的数据集,这需要大约 90 秒的时间来执行,所以我一直在寻找加速它的方法。无缘无故,我想我会尝试 PATINDEX 而不是 LIKE...
SELECT *
FROM table1 AS a
INNER JOIN table2 AS b ON (PATINDEX('%' + b.name + '%', a.name) > 0)
在同一数据集上,这会在眨眼间执行并返回相同的结果。
谁能解释为什么 LIKE 比 PATINDEX 慢得多?鉴于 LIKE 只是返回一个 BOOLEAN 而 PATINDEX 正在返回实际位置,我原以为后者会更慢,或者仅仅是两个函数的编写效率问题?
好的,这是每个查询的完整内容,然后是其执行计划。 “#StakeholderNames”只是我匹配的可能名称的临时表。
我已提取实时数据并多次运行每个查询。第一个大约需要 17 秒(比实时数据库上原来的 90 秒要少一些),第二个不到 1 秒...
SELECT sh.StakeholderID,
sh.HoldingID,
i.AgencyCommissionImportID,
1
FROM AgencyCommissionImport AS i
INNER JOIN #StakeholderNames AS sn ON REPLACE(REPLACE(i.ClientName,' ',''), ',','') LIKE '%' + sn.Name + '%'
INNER JOIN Holding AS h ON (h.ProviderName = i.Provider) AND (h.HoldingReference = i.PlanNumber)
INNER JOIN StakeholderHolding AS sh ON (sn.StakeholderID = sh.StakeholderID) AND (h.HoldingID = sh.HoldingID)
WHERE i.AgencyCommissionFileID = @AgencyCommissionFileID
AND (i.MatchTypeID = 0)
AND ((i.MatchedHoldingID IS NULL)
OR (i.MatchedStakeholderID IS NULL))
|--Table Insert(OBJECT:([tempdb].[dbo].[#Results]), SET:([#Results].[StakeholderID] = [AttivoGroup_copy].[dbo].[StakeholderHolding].[StakeholderID] as [sh].[StakeholderID],[#Results].[HoldingID] = [AttivoGroup_copy].[dbo].[StakeholderHolding].[HoldingID] as [sh].[HoldingID],[#Results].[AgencyCommissionImportID] = [AttivoGroup_copy].[dbo].[AgencyCommissionImport].[AgencyCommissionImportID] as [i].[AgencyCommissionImportID],[#Results].[MatchTypeID] = [Expr1014],[#Results].[indx] = [Expr1013]))
|--Compute Scalar(DEFINE:([Expr1014]=(1)))
|--Compute Scalar(DEFINE:([Expr1013]=getidentity((1835869607),(2),N'#Results')))
|--Top(ROWCOUNT est 0)
|--Hash Match(Inner Join, HASH:([h].[ProviderName], [h].[HoldingReference])=([i].[Provider], [i].[PlanNumber]), RESIDUAL:([AttivoGroup_copy].[dbo].[Holding].[ProviderName] as [h].[ProviderName]=[AttivoGroup_copy].[dbo].[AgencyCommissionImport].[Provider] as [i].[Provider] AND [AttivoGroup_copy].[dbo].[Holding].[HoldingReference] as [h].[HoldingReference]=[AttivoGroup_copy].[dbo].[AgencyCommissionImport].[PlanNumber] as [i].[PlanNumber] AND [Expr1015] like [Expr1016]))
|--Nested Loops(Inner Join, OUTER REFERENCES:([sh].[HoldingID]))
| |--Nested Loops(Inner Join, OUTER REFERENCES:([sn].[StakeholderID]))
| | |--Compute Scalar(DEFINE:([Expr1016]=('%'+#StakeholderNames.[Name] as [sn].[Name])+'%', [Expr1017]=LikeRangeStart(('%'+#StakeholderNames.[Name] as [sn].[Name])+'%'), [Expr1018]=LikeRangeEnd(('%'+#StakeholderNames.[Name] as [sn].[Name])+'%'), [Expr1019]=LikeRangeInfo(('%'+#StakeholderNames.[Name] as [sn].[Name])+'%')))
| | | |--Table Scan(OBJECT:([tempdb].[dbo].[#StakeholderNames] AS [sn]))
| | |--Clustered Index Seek(OBJECT:([AttivoGroup_copy].[dbo].[StakeholderHolding].[PK_StakeholderHolding] AS [sh]), SEEK:([sh].[StakeholderID]=#StakeholderNames.[StakeholderID] as [sn].[StakeholderID]) ORDERED FORWARD)
| |--Clustered Index Seek(OBJECT:([AttivoGroup_copy].[dbo].[Holding].[PK_Holding] AS [h]), SEEK:([h].[HoldingID]=[AttivoGroup_copy].[dbo].[StakeholderHolding].[HoldingID] as [sh].[HoldingID]) ORDERED FORWARD)
|--Compute Scalar(DEFINE:([Expr1015]=replace(replace([AttivoGroup_copy].[dbo].[AgencyCommissionImport].[ClientName] as [i].[ClientName],' ',''),',','')))
|--Clustered Index Scan(OBJECT:([AttivoGroup_copy].[dbo].[AgencyCommissionImport].[PK_AgencyCommissionImport] AS [i]), WHERE:([AttivoGroup_copy].[dbo].[AgencyCommissionImport].[AgencyCommissionFileID] as [i].[AgencyCommissionFileID]=[@AgencyCommissionFileID] AND [AttivoGroup_copy].[dbo].[AgencyCommissionImport].[MatchTypeID] as [i].[MatchTypeID]=(0) AND ([AttivoGroup_copy].[dbo].[AgencyCommissionImport].[MatchedHoldingID] as [i].[MatchedHoldingID] IS NULL OR [AttivoGroup_copy].[dbo].[AgencyCommissionImport].[MatchedStakeholderID] as [i].[MatchedStakeholderID] IS NULL)))
SELECT sh.StakeholderID,
sh.HoldingID,
i.AgencyCommissionImportID,
1
FROM AgencyCommissionImport AS i
INNER JOIN #StakeholderNames AS sn ON (PATINDEX('%' + sn.Name + '%', REPLACE(REPLACE(i.ClientName,' ',''), ',','')) > 0)
INNER JOIN Holding AS h ON (h.ProviderName = i.Provider) AND (h.HoldingReference = i.PlanNumber)
INNER JOIN StakeholderHolding AS sh ON (sn.StakeholderID = sh.StakeholderID) AND (h.HoldingID = sh.HoldingID)
WHERE i.AgencyCommissionFileID = @AgencyCommissionFileID
AND (i.MatchTypeID = 0)
AND ((i.MatchedHoldingID IS NULL)
OR (i.MatchedStakeholderID IS NULL))
|--Table Insert(OBJECT:([tempdb].[dbo].[#Results]), SET:([#Results].[StakeholderID] = [AttivoGroup_copy].[dbo].[StakeholderHolding].[StakeholderID] as [sh].[StakeholderID],[#Results].[HoldingID] = [AttivoGroup_copy].[dbo].[StakeholderHolding].[HoldingID] as [sh].[HoldingID],[#Results].[AgencyCommissionImportID] = [AttivoGroup_copy].[dbo].[AgencyCommissionImport].[AgencyCommissionImportID] as [i].[AgencyCommissionImportID],[#Results].[MatchTypeID] = [Expr1014],[#Results].[indx] = [Expr1013]))
|--Compute Scalar(DEFINE:([Expr1014]=(1)))
|--Compute Scalar(DEFINE:([Expr1013]=getidentity((1867869721),(2),N'#Results')))
|--Top(ROWCOUNT est 0)
|--Hash Match(Inner Join, HASH:([h].[ProviderName], [h].[HoldingReference])=([i].[Provider], [i].[PlanNumber]), RESIDUAL:([AttivoGroup_copy].[dbo].[Holding].[ProviderName] as [h].[ProviderName]=[AttivoGroup_copy].[dbo].[AgencyCommissionImport].[Provider] as [i].[Provider] AND [AttivoGroup_copy].[dbo].[Holding].[HoldingReference] as [h].[HoldingReference]=[AttivoGroup_copy].[dbo].[AgencyCommissionImport].[PlanNumber] as [i].[PlanNumber] AND patindex([Expr1015],[Expr1016])>(0)))
|--Nested Loops(Inner Join, OUTER REFERENCES:([sh].[HoldingID]))
| |--Nested Loops(Inner Join, OUTER REFERENCES:([sn].[StakeholderID]))
| | |--Compute Scalar(DEFINE:([Expr1015]=('%'+#StakeholderNames.[Name] as [sn].[Name])+'%'))
| | | |--Table Scan(OBJECT:([tempdb].[dbo].[#StakeholderNames] AS [sn]))
| | |--Clustered Index Seek(OBJECT:([AttivoGroup_copy].[dbo].[StakeholderHolding].[PK_StakeholderHolding] AS [sh]), SEEK:([sh].[StakeholderID]=#StakeholderNames.[StakeholderID] as [sn].[StakeholderID]) ORDERED FORWARD)
| |--Clustered Index Seek(OBJECT:([AttivoGroup_copy].[dbo].[Holding].[PK_Holding] AS [h]), SEEK:([h].[HoldingID]=[AttivoGroup_copy].[dbo].[StakeholderHolding].[HoldingID] as [sh].[HoldingID]) ORDERED FORWARD)
|--Compute Scalar(DEFINE:([Expr1016]=replace(replace([AttivoGroup_copy].[dbo].[AgencyCommissionImport].[ClientName] as [i].[ClientName],' ',''),',','')))
|--Clustered Index Scan(OBJECT:([AttivoGroup_copy].[dbo].[AgencyCommissionImport].[PK_AgencyCommissionImport] AS [i]), WHERE:([AttivoGroup_copy].[dbo].[AgencyCommissionImport].[AgencyCommissionFileID] as [i].[AgencyCommissionFileID]=[@AgencyCommissionFileID] AND [AttivoGroup_copy].[dbo].[AgencyCommissionImport].[MatchTypeID] as [i].[MatchTypeID]=(0) AND ([AttivoGroup_copy].[dbo].[AgencyCommissionImport].[MatchedHoldingID] as [i].[MatchedHoldingID] IS NULL OR [AttivoGroup_copy].[dbo].[AgencyCommissionImport].[MatchedStakeholderID] as [i].[MatchedStakeholderID] IS NULL)))
【问题讨论】:
-
您是否检查了这两个查询的查询计划?另外,您使用的是哪种 SQL(SQLServer、MySQL、Oracle 等)?
-
90 秒 vs 眨眼可能表明执行计划有很大的不同(连接类型)或发生了其他事情(阻塞或从磁盘读取与从缓存读取)。我非常怀疑这纯粹是因为 CPU 时间从
patindex更改为like造成的。请发布执行计划和SET STATISTICS IO ON; SET STATISTICS TIME ON;的输出 -
也许这是数据库缓存的问题?您是否尝试过在使用 DBCC 帮助程序运行每个查询之前重置缓存? (
DBCC DROPCLEANBUFFERS,DBCC FREEPROCCACHE) -
@Oleg - 前导通配符意味着索引也无济于事。
-
感谢 cmets。回答一些问题:它是 SQLServer,我使用的数据集是我们的客户端现在正在使用的实时数据(已修复性能问题),因此我必须将其拉回我们的测试站点才能运行执行计划.完成后我会更新。
标签: sql sql-server string performance tsql