【问题标题】:Optimize performance of sub-queries优化子查询的性能
【发布时间】:2013-03-19 03:11:37
【问题描述】:

我的第一个表查询大约需要 40 秒,并创建了 80,000 多行。我想按站点、序列、总数和任何操作系统版本获取 Windows 7 应用程序的计数。

这些子查询可以工作,但它们当然会大大减慢处理速度。运行时间为 3.5 小时。

有没有更有效的方法来做到这一点?

输出:

SoftwareName    Sequence    Site    Win7/site       Win7Installs/seq    TotWin7apps TotalInstalls
Adobe Acrobat       1       BKN         1                   5               626         7854
AutoCAD LT          1       BKN         1                   1               3           15
Adobe Acrobat       1       CTW         4                   5               626         7854
Adobe Captivate     1       CTW         1                   1               8           60

查询:

WITH PCapps AS (
SELECT DISTINCT
    Computer,
    Sequence,
    Site,
    SoftwareName,
    OS
FROM table
)

SELECT DISTINCT
    SoftwareName,
    Sequence,
    Site,
    (SELECT COUNT(p1.SoftwareName) FROM PCapps p1 WHERE p1.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise' AND p1.Site = pc.Site) as 'Win7/site',
    (SELECT COUNT(p1.SoftwareName) FROM PCapps p1 WHERE p1.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise' AND p1.Sequence = pc.Sequence) as 'Win7Installs/seq',
    (SELECT COUNT(p2.SoftwareName) FROM PCapps p2 WHERE p2.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise') as TotWin7apps,
    (SELECT COUNT(p3.SoftwareName) FROM PCapps p3 WHERE p3.SoftwareName = pc.SoftwareName) as TotalInstalls 
    FROM PCapps pc

** 更新:

使用@Jason Carter 的提示,我创建了一些#temp 表并加入它们。结果在不到一分钟的时间内运行得非常快(不比初始查询长多少)。此方法与@JasonCarter 的解决方案略有不同,因为我按照他最初的提示创建#temp 表。我创建了几个#temp 表,每个表都包含 COUNT()。

SELECT DISTINCT
    Computer,
    Sequence,
    Site,
    SoftwareName,
    OS
INTO #PCapps
      FROM TABLE    

SELECT
    SoftwareName,
    Site,
    COUNT(SoftwareName) AS [SiteInstalls]
INTO #SiteInstalls
FROM #PCapps 
    WHERE OS = 'Windows 7 Enterprise'
    GROUP BY Site, SoftwareName

SELECT
    SoftwareName,
    Sequence,
    COUNT(SoftwareName) AS [SeqInstalls]
INTO #SeqInstalls
FROM #PCapps
    WHERE OS = 'Windows 7 Enterprise'
    GROUP BY Sequence, SoftwareName

SELECT
    SoftwareName,
    COUNT(SoftwareName) AS [Win7Installs]
INTO #Win7Installs
FROM #PCapps
    WHERE OS = 'Windows 7 Enterprise'
    GROUP BY SoftwareName

SELECT
    SoftwareName,
    COUNT(SoftwareName) AS [AppInstalls]
INTO #AppInstalls
FROM #PCapps
    GROUP BY SoftwareName   

SELECT
    pc.SoftwareName,
    pc.Sequence,
    pc.Site,
    sit7.SiteInstalls,
    seq7.SeqInstalls,
    w7.Win7Installs,
    ai.AppInstalls
    FROM #PCapps pc
    LEFT OUTER JOIN #SiteInstalls sit7 ON sit7.SoftwareName = pc.SoftwareName AND sit7.Site = pc.Site
    LEFT OUTER JOIN #SeqInstalls  seq7 ON seq7.SoftwareName = pc.SoftwareName AND seq7.Sequence = pc.Sequence
    LEFT OUTER JOIN #Win7Installs w7   ON   w7.SoftwareName = pc.SoftwareName
    LEFT OUTER JOIN #AppInstalls ai    ON   ai.Softwarename = pc.SoftwareName

DROP TABLE #PCapps
DROP TABLE #SiteInstalls
DROP TABLE #SeqInstalls
DROP TABLE #Win7Installs
DROP TABLE #AppInstalls

【问题讨论】:

    标签: sql-server sql-server-2008 subquery query-optimization


    【解决方案1】:

    我会尝试首先将子查询运行到临时表中以收集您的计数,然后提取您的总计数。使用查询设置的方式,它将为每一行 PCapp 运行每个子查询一次,这就是为什么它需要这么长时间。

    试试这样的:

    declare @t table (Computer varchar(10), Sequence int, Site varchar(3), SoftwareName varchar(20), OS varchar(20) )
    
    insert into @t values
     ('C1',1,'BKN','Adobe Acrobat','Win7')
    ,('C2',1,'BKN','Adobe Acrobat','Win7')
    ,('C3',1,'BKN','Adobe Acrobat','Win7')
    ,('C1',1,'BKN','AutoCAD LT ','Win7')
    ,('C3',1,'BKN','AutoCAD LT ','Win7')
    ,('B1',2,'CTW','Adobe Acrobat','Win7')
    ,('B2',2,'CTW','Adobe Acrobat','Win7')
    ,('B3',3,'CTW','Adobe LT','Win7')
    ,('B4',3,'CTW','Adobe Acrobat','Win7')
    ,('A1',2,'CTW','Adobe Acrobat','Win7')
    ,('A2',2,'CTW','Adobe LT','Win7')
    ,('A3',2,'CTW','Adobe Acrobat','Win7')
    ,('X4',3,'CTW','Adobe Acrobat','WinXP')
    ,('X1',2,'CTW','Adobe Acrobat','WinXP')
    ,('X2',2,'CTW','Adobe LT','WinXP')
    ,('X3',2,'CTW','Adobe Acrobat','WinXP')
    ,('A4',2,'CTW','Adobe Acrobat','Win7');
    
    SELECT 
        Site,
        OS,
        SoftwareName, 
        Sequence,
        COUNT(SoftwareName)  as 'Count'
    into #SiteSpecific
    FROM @t
    group by Site, OS, SoftwareName, Sequence;
    
    
    WITH PCapps AS (
    SELECT DISTINCT
        Computer,
        Sequence,
        Site,
        SoftwareName,
        OS
    FROM @t
    )
    SELECT 
        DISTINCT
        PC.SoftwareName,
        PC.Sequence,
        PC.Site,
        PerSeq.[Count] as 'Win7Installs/seq',
        PerSite.[Count] as TotWin7apps,
        total.[Count] as TotalInstalls 
    FROM PCapps pc
    inner join (select SoftwareName, Sequence, OS, sum([Count]) [Count] from #SiteSpecific group by SoftwareName, Sequence, OS) PerSeq 
        on PerSeq.SoftwareName = pc.Softwarename 
        AND PerSeq.Sequence = pc.Sequence
        AND PerSeq.OS = pc.OS
    inner join (select SoftwareName, Site, OS, sum([Count]) [Count] from #SiteSpecific group by SoftwareName, Site, OS) PerSite 
        on PerSite.SoftwareName = pc.Softwarename 
        AND PerSite.Site = pc.Site
        AND PerSite.OS = pc.OS
    inner join (select SoftwareName, sum([Count]) [Count] from #SiteSpecific group by SoftwareName) Total 
        on Total.SoftwareName = pc.Softwarename 
    where Pc.OS='Win7'
    order by SoftwareName, Sequence, Site
    

    【讨论】:

    • 我根据您创建#temp 表的初始评论将我的答案包含在我的问题中。它运行每个子查询一次,然后将它们全部连接在一起以快速显示结果。谢谢!
    【解决方案2】:

    我真的很惊讶...优化器利用并行性使子查询非常高效。我用 120K 记录填充了一个虚拟表并尝试了以下方法。第三个查询只比第一个(你的)稍微高效一点,但代价是更复杂。我会保持原样。可能有更好的解决方案,但您的解决方案对我来说已经足够好了。处理这 80K 行需要多长时间?

    WITH PCapps2 AS (
    SELECT DISTINCT
        Computer,
        Sequence,
        Site,
        SoftwareName,
        OS
    FROM pcapps
    )
    SELECT DISTINCT
        SoftwareName,
        Sequence,
        Site,
        (SELECT COUNT(p1.SoftwareName) FROM PCapps2 p1 WHERE p1.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise' AND p1.Site = pc.Site) as 'Win7/site',
        (SELECT COUNT(p1.SoftwareName) FROM PCapps2 p1 WHERE p1.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise' AND p1.Sequence = pc.Sequence) as 'Win7Installs/seq',
        (SELECT COUNT(p2.SoftwareName) FROM PCapps2 p2 WHERE p2.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise') as TotWin7apps,
        (SELECT COUNT(p3.SoftwareName) FROM PCapps2 p3 WHERE p3.SoftwareName = pc.SoftwareName) as TotalInstalls 
        FROM PCapps2 pc
    
    ;WITH PCapps2 AS (
    SELECT DISTINCT
        Computer,
        Sequence,
        Site,
        SoftwareName,
        OS
    FROM pcapps
    )
    SELECT DISTINCT
        SoftwareName,
        Sequence,
        Site,
        COUNT(case when os = 'Windows 7 Enterprise' then 1 end) over (partition by pc.Softwarename,pc.Site) as 'Win7/site',
        COUNT(case when os = 'Windows 7 Enterprise' then 1 end) over (partition by pc.Softwarename,pc.Sequence) as 'Win7Installs/seq',
        COUNT(case when os = 'Windows 7 Enterprise' then 1 end) over (partition by pc.Softwarename) as TotWin7apps,
        COUNT(*) over (partition by pc.Softwarename) as TotalInstalls
        FROM PCapps2 pc
    
    ;WITH PCapps2 AS (
    SELECT DISTINCT
        Computer,
        Sequence,
        Site,
        SoftwareName,
        OS
    FROM pcapps
    ), a as (
        select softwarename, site, COUNT(*) as 'Win7/site'
        from pcapps2
        where os = 'Windows 7 Enterprise'
        group by softwarename, site
    ), b as (
        select softwarename, sequence, COUNT(*) as 'Win7Installs/seq'
        from pcapps2
        where os = 'Windows 7 Enterprise'
        group by softwarename, sequence
    ), c as (
        select softwarename, COUNT(case when os = 'Windows 7 Enterprise' then 1 end) as TotWin7apps, COUNT(*) as TotalInstalls
        from pcapps2
        group by softwarename
    ), d as (
        select distinct softwarename, sequence, site
        from    pcapps2
    )
    select d.*, isnull(a.[Win7/site], 0) as [Win7/site], isnull(b.[Win7Installs/seq],0) as [Win7Installs/seq] 
            , isnull(c.TotWin7apps,0) as TotWin7apps, isnull(c.TotalInstalls, 0) as TotalInstalls  
    from    d
            left join a on d.softwarename = a.softwarename and d.site = a.site 
            left join b on d.softwarename = b.softwarename and d.sequence = b.sequence 
            left join c on d.softwarename = c.softwarename 
    

    第一个查询: 表'pcapps'。扫描计数 15,逻辑读取 6630,物理读取 0,预读读取 0,lob 逻辑读取 0,lob 物理读取 0,lob 预读读取 0。 表“工作台”。扫描计数 0,逻辑读取 0,物理读取 0,预读读取 0,lob 逻辑读取 0,lob 物理读取 0,lob 预读读取 0。

    第二次查询: 表'pcapps'。扫描计数 3,逻辑读取 1326,物理读取 0,预读读取 0,lob 逻辑读取 0,lob 物理读取 0,lob 预读读取 0。 表“工作台”。扫描计数 18,逻辑读取 1983591,物理读取 0,预读读取 0,lob 逻辑读取 0,lob 物理读取 0,lob 预读读取 0。

    第三个查询: 表'pcapps'。扫描计数 12,逻辑读取 5304,物理读取 0,预读读取 0,lob 逻辑读取 0,lob 物理读取 0,lob 预读读取 0。 表“工作台”。扫描计数 0,逻辑读取 0,物理读取 0,预读读取 0,lob 逻辑读取 0,lob 物理读取 0,lob 预读读取 0。

    【讨论】:

    • 从几个连接表的初始查询在 38 秒内选择了 80,000 条记录。昨天运行了带有子查询的完整查询,耗时 3.5 小时。使用来自@JasonCarter 的提示,查询会在一分钟内运行。
    猜你喜欢
    • 2018-06-21
    • 2021-12-06
    • 2016-12-01
    • 2020-07-05
    • 2013-07-02
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2022-07-26
    相关资源
    最近更新 更多