【问题标题】:MySQL Query (Sub Queries + Composed Functions + JOIN operations) takes too long to runMySQL 查询(子查询 + 组合函数 + JOIN 操作)运行时间过长
【发布时间】:2021-10-16 08:14:18
【问题描述】:

如何使用由函数组成的子查询和函数中的联接查询来修改以下查询。我想在我的主查询中附加额外的值,该查询依赖于主表和两个主连接(风险、用户)。

创建一个 MCVRE(最小完整可验证可重现示例)被证明有点具有挑战性,因为发送到 SQL Fiddle 的请求有太多行(太多文本字符)在删除主要两个表(用户、风险)上的几乎所有行之后,我最终得到一个正在运行的查询。

Fiddle (http://www.sqlfiddle.com/#!9/1d52a0/17) 创建函数和插入数据命令减少了我本地 PC 上实际示例的行数,因为 SQLFiddle 理解的请求负载超过了 8000 个字符数。

实际的表大约有 100 行风险,20 左右的用户行,运行大约需要 3 秒

我能做些什么来加快查询速度,通过将所需的函数结果保存在表中,或通过修订、索引插入、将连接移动到外部主查询,甚至使用存储过程,或重写查询结构,以将执行时间减少到可能的一半或更不乐观。 SQL fiddle 不会占用所有需要的行,所以我粘贴了一个非常有限的子集,即使 SQLFiddle 查询(请参阅下面的总选择查询)也不会运行,因为 Stack Overflow(部分是双关语)。

http://www.sqlfiddle.com/#!9/1d52a0/17

在小提琴上运行的基本查询(参见小提琴)

select * from users;
select * from risks;
select * from riskevents;
select * from riskmatrixthresholds;
select * from risklevels;

#significantly 最小化结果集,但由于 sql fiddle 上的堆栈溢出问题,仍然无法运行查询 - 请参阅 fiddle 结果(在 fiddle 查询输出的最底部)

SELECT  r.RiskID,   
    r.CreatorID,
    r.OwnerID,
    r.ApproverID,
    r.RiskTitle,
    r.RiskStatement,
    r.ClosureCriteria,
    r.RiskState,
    r.Context                                                                               AS 'Context',
    GetRiskUserLastOrFirstName(GetRiskUserID('Creator', r.RiskID,0),r.RiskID, 'Last','')    AS 'creator.lastname',
    GetRiskUserLastOrFirstName(GetRiskUserID('Creator', r.RiskID,0),r.RiskID, 'First','')   AS 'creator.firstname',
    GetRiskUserLastOrFirstName(GetRiskUserID('Owner',   r.RiskID,0),r.RiskID, 'Last','')    AS 'owner.lastname', 
    GetRiskUserLastOrFirstName(GetRiskUserID('Owner',   r.RiskID,0),r.RiskID, 'First','')   AS 'owner.firstname',
    GetRiskUserLastOrFirstName(GetRiskUserID('Approver',r.RiskID,0),r.RiskID, 'Last','')    AS 'approver.lastname',
    GetRiskUserLastOrFirstName(GetRiskUserID('Approver',r.RiskID,0),r.RiskID, 'First','')   AS 'approver.firstname',
    r.Likelihood                                                                            AS 'OriginalLikelihood',
    r.Technical                                                                             AS 'OriginalTechnical',
    r.Schedule                                                                              AS 'OriginalSchedule',
    r.Cost                                                                                  AS 'OriginalCost',
    GREATEST(r.Technical, r.Schedule, r.Cost)                                               AS 'OriginalConsequence',
    RiskValue(r.Likelihood, GREATEST(r.Technical, r.Schedule, r.Cost),0)                    AS 'OriginalValue',
    RiskLevel(RiskValue(r.Likelihood, GREATEST(r.Technical, r.Schedule, r.Cost),0),'')      AS 'OriginalLevel',
    LatestEventDate(r.RiskID, r.AssessmentDate,'')                                          AS 'LatestEventDate',
    r.AssessmentDate                                                                        AS 'AssessmentDate',
    (SELECT CurrentLikelihood(r.RiskID,0))                                                  AS 'CurrentLikelihood',
    (SELECT CurrentConsequence(r.RiskID,0))                                                 AS 'CurrentConsequence',
    (SELECT CurrentRiskValue(r.RiskID,0))                                                   AS 'CurrentValue',
    (SELECT RiskLevel(CurrentRiskValue(r.RiskID,0),''))                                     AS 'CurrentLevel'
FROM  risks r;

创建函数脚本

   CREATE TABLE `riskevents` (
  `ID` int NOT NULL AUTO_INCREMENT,
  `EventID` int ,
  `RiskID` int ,
  `EventTitle` text,
  `EventStatus` varchar(10) ,
  `EventOwnerID` int ,
  `ActualDate` date ,
  `ScheduleDate` date ,
  `BaselineDate` date ,
  `ActualLikelihood` int ,
  `ActualTechnical` int ,
  `ActualSchedule` int ,
  `ActualCost` int ,
  `ScheduledLikelihood` int ,
  `ScheduledTechnical` int ,
  `ScheduledSchedule` int ,
  `ScheduledCost` int ,
  `BaselineLikelihood` int ,
  `BaselineTechnical` int ,
  `BaselineSchedule` int ,
  `BaselineCost` int ,
  PRIMARY KEY (`ID`)
)
CREATE TABLE `risklevels` (
  `ID` int NOT NULL AUTO_INCREMENT,
  `RiskLevelID` int ,
  `RiskMaximum` float ,
  `RiskHigh` float ,
  `RiskMedium` float ,
  `RiskMinimum` float ,
  PRIMARY KEY (`ID`)
) 

CREATE TABLE `riskmatrixthresholds` (
  `ID` int NOT NULL AUTO_INCREMENT,
  `CellID` int ,
  `Likelihood` int ,
  `Consequence` int ,
  `Level` decimal(2,2) ,
  PRIMARY KEY (`ID`)
) 

CREATE TABLE `risks` (
  `ID` int NOT NULL AUTO_INCREMENT,
  `RiskState` varchar(10) ,
  `RiskID` int ,
  `RiskTitle` text CHARACTER SET latin1,
  `RiskStatement` text CHARACTER SET latin1,
  `ApproverID` int ,
  `OwnerID` int ,
  `CreatorID` int ,
  `Likelihood` int ,
  `Technical` int ,
  `Schedule` int ,
  `Cost` int ,
  `ClosureCriteria` text CHARACTER SET latin1,
  `CategoryID` int ,
  `AssessmentDate` date ,
  `CompletionDate` date ,
  `ClosureDate` date ,
  `Context` text,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `risk_index` (`RiskID`)
) 

CREATE TABLE `users` (
  `ID` int NOT NULL AUTO_INCREMENT,
  `UserID` int NOT NULL,
  `LastName` char(25) ,
  `FirstName` char(15) ,
  `Title` char(20) ,
  `Email` varchar(30) ,
  `Phone` char(12) ,
  `Extension` char(4) ,
  `Department` char(25) ,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `user_index` (`UserID`),
  KEY `SURROGATE` (`UserID`)
)

insert  into `riskevents`(`ID`,`EventID`,`RiskID`,`EventTitle`,`EventStatus`,`EventOwnerID`,`ActualDate`,`ScheduleDate`,`BaselineDate`,`ActualLikelihood`,`ActualTechnical`,`ActualSchedule`,`ActualCost`,`ScheduledLikelihood`,`ScheduledTechnical`,`ScheduledSchedule`,`ScheduledCost`,`BaselineLikelihood`,`BaselineTechnical`,`BaselineSchedule`,`BaselineCost`) values 
(171,0,1,'Risk','Complete',5,'2019-06-14',NULL,'2019-06-14',5,2,2,5,NULL,NULL,NULL,NULL,5,2,2,5),
(184,0,10,'Risk','Complete',21,'2019-10-07',NULL,'2019-10-07',5,4,5,4,NULL,NULL,NULL,NULL,5,4,5,4));

insert  into `risklevels`(`ID`,`RiskLevelID`,`RiskMaximum`,`RiskHigh`,`RiskMedium`,`RiskMinimum`) values 
(1,1,1,0.55,0.3,0);

insert  into `riskmatrixthresholds`(`ID`,`CellID`,`Likelihood`,`Consequence`,`Level`) values 
(1,1,1,1,0.09),
(2,2,1,2,0.12),
(3,3,1,3,0.16),
(4,4,1,4,0.19),
(5,5,1,5,0.23),
(6,6,2,1,0.12),
(7,7,2,2,0.19),
(8,8,2,3,0.27),
(9,9,2,4,0.34),
(10,10,2,5,0.41),
(11,11,3,1,0.16),
(12,12,3,2,0.27),
(13,13,3,3,0.37),
(14,14,3,4,0.48),
(15,15,3,5,0.59),
(16,16,4,1,0.19),
(17,17,4,2,0.34),
(18,18,4,3,0.48),
(19,19,4,4,0.63),
(20,20,4,5,0.77),
(21,21,5,1,0.23),
(22,22,5,2,0.41),
(23,23,5,3,0.59),
(24,24,5,4,0.77),
(25,25,5,5,0.95);

insert  into `risks`(`ID`,`RiskState`,`RiskID`,`RiskTitle`,`RiskStatement`,`ApproverID`,`OwnerID`,`CreatorID`,`Likelihood`,`Technical`,`Schedule`,`Cost`,`ClosureCriteria`,`CategoryID`,`AssessmentDate`,`CompletionDate`,`ClosureDate`,`Context`) values 
(1,'Completed',1,'t','t',1,5,1,5,2,2,5,'t',NULL,'2019-06-14','2020-09-26',NULL,'t'),
(2,'Completed',2,'t','t',2,1,1,5,3,4,2,'test',NULL,'2019-05-14',NULL,NULL,'t'),

insert  into `users`(`ID`,`UserID`,`LastName`,`FirstName`,`Title`,`Email`,`Phone`,`Extension`,`Department`) values 
(1,1,'Admin','','Admin','a@yz.com','17890','1234',''),
(2,2,'Last','First','Engineer','a@yz.com','123890','1234','Supplier');

CREATE FUNCTION Consequence(technical int, sched int, cost int, consequence int) RETURNS int
BEGIN
    select GREATEST(technical, sched, cost) into consequence;
return consequence;
END;

CREATE FUNCTION CurrentRiskEventID(riskidentifier int, eid int) RETURNS int
BEGIN
select MAX(e.EventID) into eid
FROM riskevents e
WHERE e.eventstatus not in('Open')
AND e.riskid = riskidentifier;
return riskeventid;
END;

CREATE FUNCTION CurrentConsequence(riskidentifier int, currentconsequence int) RETURNS int
BEGIN
SELECT coalesce(
       (SELECT GREATEST(actualtechnical, actualschedule, actualcost)
FROM   riskevents 
WHERE  id = CurrentRiskEventID(riskidentifier, 0) 
and    actualtechnical is not null
ANDactualschedule is not null
andactualschedule is not null),
       (SELECT greatest(technical, schedule, cost)
       from risks 
       Where riskid = riskidentifier)
) into currentconsequence;
return currentconsequence;
END;

CREATE FUNCTION CurrentLikelihood(riskidentifier int, currentlikelihood int) RETURNS int
BEGIN
SELECT coalesce( 
(SELECT actuallikelihood
FROM riskevents
WHERE id = CurrentRiskEventID(riskidentifier, 0)),
(SELECT r.likelihood
FROM risks r
WHERE r.riskid = riskidentifier)) into currentlikelihood;
return currentlikelihood;
END;

CREATE FUNCTION CurrentRiskLevel(riskidentifier int, currentrisklevel int) RETURNS int
BEGIN
select RiskLevel(CurrentRiskValue(riskidentifier, 0), '') into currentrisklevel;
return currentrisklevel;
END;

CREATE FUNCTION CurrentRiskValue(riskidentifier int, currentriskvalue int) RETURNS int
BEGIN
SELECT RiskValue(CurrentLikelihood(riskidentifier, 0), CurrentConsequence(riskidentifier, 0), 0) into currentriskvalue;
return currentriskvalue;
END;

CREATE FUNCTION GetRiskUserID(riskusertype VARCHAR(25), riskidentifier int, riskuserid int) RETURNS int
BEGIN
SELECT COALESCE(userres.userid, 0) into riskuserid FROM
(
SELECT r.creatorid, r.ownerid, r.approverid, u.userid
FROM risks r, users u
WHERE r.riskid = (select riskidentifier) and
      (
((select riskusertype) = 'Creator' AND u.userid = r.creatorid) OR
((select riskusertype) = 'Approver' AND u.userid = r.approverid) OR
((select riskusertype) = 'Owner' AND u.userid = r.ownerid)
)
) userres;
RETURN riskuserid;
END;
CREATE FUNCTION GetRiskUserLastOrFirstName(riskuserid int, riskid int, whichname char(25), firstorlastname char(25)) RETURNS char(25) CHARSET utf8 COLLATE utf8_unicode_ci
BEGIN
SELECT (case
  when whichname = 'Last'  then u.LastName
  WHEN whichname = 'First' THEN u.FirstName
end)
into firstorlastname
FROM users u,risks r 
WHERE u.UserID = riskuserid
AND r.RiskID = riskid;
return firstorlastname;
END;

CREATE FUNCTION LatestEventDate(riskidentifier int, riskassessmentdate date, latestdate date) RETURNS date
BEGIN
SELECT COALESCE(
(SELECT ActualDate FROM riskevents evt WHERE evt.eventid = CurrentRiskEventID(riskidentifier, 0) and evt.riskid = riskidentifier), 
(SELECT riskassessmentdate)) into latestdate;
return latestdate;
END;

CREATE FUNCTION RiskLevel(riskvalue int, risklevel varchar(4)) RETURNS varchar(4)
begin
SELECT
       CASE 
    WHEN riskvalue >= levels.riskhigh*100 THEN 'High'
    WHEN riskvalue >= levels.riskmedium*100 THEN 'Med'
    ELSE 'Low'
       ENd as cat into risklevel
FROM risklevels levels;
return risklevel;
END;

CREATE FUNCTION RiskValue(likelihood int, consequence int, riskvalue int) RETURNS int
BEGIN
SELECT m.level*100 INTO riskvalue FROM riskmatrixthresholds m WHERE m.likelihood = likelihood AND m.consequence = consequence;
  RETURN riskvalue;   
END;

http://www.sqlfiddle.com/#!9/1d52a0/17

【问题讨论】:

    标签: mysql join subquery query-optimization stored-functions


    【解决方案1】:

    注意:SQL 是一种声明性 语言,而不是过程 语言。你告诉它你想要什么,而不是如何得到它。您对函数等的使用是程序性的。

    如何让这个应用程序更快?

    首先,使用最新版本的 MySQL(8+ 或 MariaDB 10.4+)。更高版本变得更快。

    其次,您已经声明了使用“由函数组成的子查询”的要求。这意味着您可能对性能无能为力。

    为什么不呢?隐藏在函数中的子查询就是所谓的依赖子查询。那些表现不佳。而且因为它们被埋没了,所以 MySQL 的查询规划器无法做任何有用的事情来优化它们。

    重构您的查询以避免使用具有SELECT 操作的函数将使查询规划器能够查看您的整体查询。这将给它一个优化事物的机会。您可以将它们替换为视图。

    并且不要使用SELECT tablea, tableb 语法。这自 1992 年以来就已经过时了。使用 SELECT tablea JOIN tableb ON tablea.joincolumn = tableb.joincolumn.

    我会为您提供更多建议,但我无法弄清楚您的意图。

    【讨论】:

    • 感谢您的有用建议。为了帮助澄清我想要的结果,我希望保持相同的表结构,但这样我可以比较一个表的(风险 - 具有起始水平(可能性、技术、时间表、成本)和另一个表事件 - 与风险相关,并且对于每个风险最多有 5 个事件)。我搜索具有“完成”实际日期的最大事件,意思是(最新事件的可能性、技术、时间表、成本存在)。我想显示起始级别和结束级别。技术。预定。成本。使用 Greatest() fn 压缩为 Consequence。
    【解决方案2】:

    CurrentLikelihood()CurrentConsequence() 中应用以下更改将总查询执行时间减少到 exec 0.070 秒,总共 0.082 秒。

    旧的当前可能性查询(产生缓慢且不正确的输出)

    SELECT coalesce( 
    (SELECT actuallikelihood
    FROM riskevents
    WHERE id = CurrentRiskEventID(riskidentifier, 0)),
    (SELECT r.likelihood
    FROM risks r
    WHERE r.riskid = riskidentifier)) into currentlikelihood;
    return currentlikelihood;
    

    工作当前可能性查询

    SELECT actuallikelihood INTO currentlikelihood 
    FROM riskevents
    WHERE eventid = CurrentRiskEventID(riskidentifier)
    AND riskid = riskidentifier;
    

    旧的 CurrentConsequence 查询(产生缓慢且不正确的输出)

    SELECT coalesce(
           (SELECT GREATEST(actualtechnical, actualschedule, actualcost)
    FROM   riskevents 
    WHERE  id = CurrentRiskEventID(riskidentifier, 0) 
    and    actualtechnical is not null
    and actualschedule is not null),
           (SELECT greatest(technical, schedule, cost)
           from risks 
           Where riskid = riskidentifier)
    ) into currentconsequence;
    

    工作当前结果查询

    SELECT GREATEST(actualtechnical, actualschedule, actualcost) INTO currentconsequence
    FROM    riskevents
    WHERE   eventid = CurrentRiskEventID(riskidentifier)
    AND riskid = riskidentifier;
    

    旧的 CurrentRiskEventID() 查询

    select MAX(e.EventID) into currentriskeventid
    FROM riskevents e
    WHERE e.eventstatus not in('Open')
    AND e.riskid = riskidentifier;
    

    修改GetRiskEventID()函数

    SELECT MAX(e.EventID) INTO currentriskeventid
        FROM riskevents e
        WHERE e.riskid = riskidentifier AND 
            (e.eventstatus != 'Open'
            OR
            (e.EventID = 0 AND e.eventstatus = 'Open'));
    

    【讨论】:

      猜你喜欢
      • 2019-07-09
      • 1970-01-01
      • 2012-03-11
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多