【发布时间】:2018-10-17 08:57:24
【问题描述】:
您好,我有以下 SQL 查询,平均需要 40 分钟才能运行,它引用的其中一个表中有超过 700 万条记录。
我已经通过数据库调优顾问运行了这个并应用了所有建议,我还在 sql 的活动监视器中对其进行了评估,并且没有推荐进一步的索引等。
任何建议都会很棒,在此先感谢
WITH CTE AS
(
SELECT r.Id AS ResultId,
r.JobId,
r.CandidateId,
r.Email,
CAST(0 AS BIT) AS EmailSent,
NULL AS EmailSentDate,
'PICKUP' AS EmailStatus,
GETDATE() AS CreateDate,
C.Id AS UserId,
C.Email AS UserEmail,
NULL AS Subject
FROM Result R
INNER JOIN Job J ON R.JobId = J.Id
INNER JOIN User C ON J.UserId = C.Id
WHERE
ISNULL(J.Approved, CAST(0 AS BIT)) = CAST(1 AS BIT)
AND ISNULL(J.Closed, CAST(0 AS BIT)) = CAST(0 AS BIT)
AND ISNULL(R.Email,'') <> '' -- has an email address
AND ISNULL(R.EmailSent, CAST(0 AS BIT)) = CAST(0 AS BIT) -- email has not been sent
AND R.EmailSentDate IS NULL -- email has not been sent
AND ISNULL(R.EmailStatus,'') = '' -- email has not been sent
AND ISNULL(R.IsEmailSubscribe, 'True') <> 'False' -- not unsubscribed
-- not already been emailed for this job
AND NOT EXISTS (
SELECT SMTP.Email
FROM SMTP_Production SMTP
WHERE SMTP.JobId = R.JobId AND SMTP.CandidateId = R.CandidateId
)
-- not unsubscribed
AND NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE ISNULL(u.EmailAddress, '') = ISNULL(R.Email, '')
)
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId
)
AND C.Id NOT IN (
-- list of ids
)
AND J.Id NOT IN (
-- list of ids
)
AND J.ClientId NOT IN
(
-- list of ids
)
)
INSERT INTO smtp_production (ResultId, JobId, CandidateId, Email, EmailSent, EmailSentDate, EmailStatus, CreateDate, ConsultantId, ConsultantEmail, Subject)
OUTPUT INSERTED.ResultId,GETDATE() INTO ResultstoUpdate
SELECT
CTE.ResultId,
CTE.JobId,
CTE.CandidateId,
CTE.Email,
CTE.EmailSent,
CTE.EmailSentDate,
CTE.EmailStatus,
CTE.CreateDate,
CTE.UserId,
CTE.UserEmail,
NULL
FROM CTE
INNER JOIN
(
SELECT *, row_number() over(partition by CTE.Email, CTE.CandidateId order by CTE.EmailSentDate desc) as rn
FROM CTE
) DCTE ON CTE.ResultId = DCTE.ResultId AND DCTE.rn = 1
请在下面查看我更新后的查询:
WITH CTE AS
(
SELECT R.Id AS ResultId,
r.JobId,
r.CandidateId,
R.Email,
CAST(0 AS BIT) AS EmailSent,
NULL AS EmailSentDate,
'PICKUP' AS EmailStatus,
GETDATE() AS CreateDate,
C.Id AS UserId,
C.Email AS UserEmail,
NULL AS Subject
FROM RESULTS R
INNER JOIN JOB J ON R.JobId = J.Id
INNER JOIN Consultant C ON J.UserId = C.Id
WHERE
J.DCApproved = 1
AND (J.Closed = 0 OR J.Closed IS NULL)
AND (R.Email <> '' OR R.Email IS NOT NULL)
AND (R.EmailSent = 0 OR R.EmailSent IS NULL)
AND R.EmailSentDate IS NULL -- email has not been sent
AND (R.EmailStatus = '' OR R.EmailStatus IS NULL)
AND (R.IsEmailSubscribe = 'True' OR R.IsEmailSubscribe IS NULL)
-- not already been emailed for this job
AND NOT EXISTS (
SELECT SMTP.Email
FROM SMTP_Production SMTP
WHERE SMTP.JobId = R.JobId AND SMTP.CandidateId = R.CandidateId
)
-- not unsubscribed
AND NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE (u.EmailAddress = R.Email OR (u.EmailAddress IS NULL AND R.Email IS NULL))
)
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId
)
AND C.Id NOT IN (
-- LIST OF IDS
)
AND J.Id NOT IN (
-- LIST OF IDS
)
AND J.ClientId NOT IN
(
-- LIST OF IDS
)
)
INSERT INTO smtp_production (ResultId, JobId, CandidateId, Email, EmailSent, EmailSentDate, EmailStatus, CreateDate, UserId, UserEmail, Subject)
OUTPUT INSERTED.ResultId,GETDATE() INTO ResultstoUpdate
SELECT
CTE.ResultId,
CTE.JobId,
CTE.CandidateId,
CTE.Email,
CTE.EmailSent,
CTE.EmailSentDate,
CTE.EmailStatus,
CTE.CreateDate,
CTE.UserId,
CTE.UserEmail,
NULL
FROM CTE
INNER JOIN
(
SELECT *, row_number() over(partition by CTE.Email, CTE.CandidateId order by CTE.EmailSentDate desc) as rn
FROM CTE
) DCTE ON CTE.ResultId = DCTE.ResultId AND DCTE.rn = 1
GO
【问题讨论】:
-
ISNULL在您的WHERE中会破坏您查询的任何 SARGability。不要在您的WHERE中使用它,使用格式({Expression} = {Value} or {Expression} IS NULL) -
@Larnu 我相信这条评论本身就足以成为一个答案。
-
@GeorgeMenoutis 你可能是对的。我想先查看查询的其余部分,但看到那是一个非常早的地方。
-
子句
R.Email <> '' OR R.Email IS NOT NULL有点多余。如果R.Email的值为'',则它的值为NULL;所以将评估为真(意味着将返回值为''的值)。您可能只需要R.Email <> ''在这里,因为NULL <> ''评估为未知,这是不正确的。 -
即使现在只是运行 SELECT STATEMENT 例如SELECT CTE.ResultId, etc FROM CTE 导致我的服务器以 100% ram 运行
标签: sql sql-server database database-performance sqlperformance