【问题标题】:T-SQL | String "Manipulation" and AggregationT-SQL |字符串“操作”和聚合
【发布时间】:2014-10-03 16:28:16
【问题描述】:

我有以下场景。

源表 1

CREATE TABLE #Table1 
(
     Div varchar(10), 
     Dept varchar(10), 
     States varchar(10)
)

INSERT INTO #Table1
   SELECT 'Div1','Dept1','CA,NV,TX'
   UNION ALL
   SELECT 'Div2','Dept2','MI,OH,IN'
   UNION ALL
   SELECT 'Div3','Dept2','NY,NJ,PA'
   UNION ALL
   SELECT 'Div4','Dept1',NULL

源表 2

CREATE TABLE #Table2 
(
    Div varchar(10), 
    Dept varchar(10), 
    States varchar(10)
)

INSERT INTO #Table2
   SELECT 'Div1','Dept1','CA'
   UNION ALL
   SELECT 'Div1','Dept1','NV, TX'
   UNION ALL
   SELECT 'Div1','Dept1','TX, CA'
   UNION ALL
   SELECT 'Div1','Dept1','CA, NV'
   UNION ALL
   SELECT 'Div2','Dept2','MI, OH'
   UNION ALL
   SELECT 'Div2','Dept2','MI, IN'
   UNION ALL
   SELECT 'Div2','Dept2','OH'
   UNION ALL
   SELECT 'Div3','Dept2','NY, NJ, PA'

期望的输出

CREATE TABLE #Table3 
(
    Div varchar(10), 
    Dept varchar(10), 
    States varchar(50)
)

INSERT INTO #Table3
SELECT 'Div1','Dept1','CA - (3), NV - (2), TX - (2)'
UNION ALL
SELECT 'Div2','Dept2','MI - (2), OH - (2), IN - (1)'
UNION ALL
SELECT 'Div3','Dept2','NY - (1), NJ - (1), PA - (1)'
UNION ALL
SELECT 'Div4','Dept1',NULL

SELECT * FROM #Table1
SELECT * FROM #Table2
SELECT * FROM #Table3

DROP TABLE #Table1
DROP TABLE #Table2
DROP TABLE #Table3

SQLFIDDLE

目标:基于#Table1#Table2,在DivDept 字段上连接两个表,然后在States 字段中聚合不同状态的计数,并在您拥有Div 的位置创建输出、DeptStates,并在状态旁边打印每个状态的唯一计数。

我不确定如何实现这一点。我正在尝试LIKE,但不能完全弄清楚如何使其动态化。我会继续尝试看看我是否能弄清楚。想我会在这里发布这个问题,看看我是否可以得到一些帮助。

谢谢

更新:

期望的输出

Div     Dept    States
Div1    Dept1   CA - (3), NV - (2), TX - (2)
Div2    Dept2   MI - (2), OH - (2), IN - (1)
Div3    Dept2   NY - (1), NJ - (1), PA - (1)
Div4    Dept1   NULL

【问题讨论】:

  • 你能在问题中添加你想要的输出吗?
  • @ZoffDino 想要的输出是#Table3中的数据集
  • 多个值不应该存储在一个字段中,如果标准化会容易得多。
  • 我猜状态计数仅基于Table2中出现的内容?
  • States 列绝对应该标准化

标签: sql sql-server tsql sql-server-2012


【解决方案1】:

您的要求非常苛刻,但作为开发人员,我们必须使用我们所拥有的。这是一个广泛使用公用表表达式 (CTE) 的解决方案:

;WITH
    CTE1 AS
    (
        SELECT      Div, Dept,
                    REPLACE(States,' ','') + ',' AS States
        FROM        Table2
    ),
    CTE2 AS
    (
        SELECT      c1.Div, c1.Dept,
                    LEFT(c1.States,CHARINDEX(',', c1.States)-1)                 AS IndividualState,
                    RIGHT(c1.States,LEN(c1.States)-CHARINDEX(',', c1.States))   AS RemainingStates
        FROM        CTE1    c1
        UNION ALL
        SELECT      c2.Div, c2.Dept,
                    LEFT(c2.RemainingStates,CHARINDEX(',', c2.RemainingStates)-1),
                    RIGHT(c2.RemainingStates,LEN(c2.RemainingStates) - CHARINDEX(',', c2.RemainingStates))
        FROM        CTE2    c2
        WHERE       LEN(c2.RemainingStates) > 0
    ),
    CTE3 AS
    (
        SELECT      Div, Dept,
                    IndividualState,
                    COUNT(*)            AS StateCount
        FROM        CTE2
        GROUP BY    Div, Dept, IndividualState
    ),
    CTE4 AS
    (
        SELECT      t1.Div, t1.Dept,
                    (
                        SELECT  c3.IndividualState + ' - (' + CONVERT(varchar(10),c3.StateCount) + '), ' 
                        FROM    CTE3 c3
                        WHERE   c3.Div = t1.Div AND c3.Dept = t1.Dept
                        FOR XML PATH('')
                    )       AS States
        FROM        Table1  t1
    )

SELECT  Div, Dept,
        LEFT(States, LEN(States) - 1) AS States
FROM    CTE4

说明

  1. CTE1清理Table2中的数据:去掉空格,末尾加逗号
  2. CTE2 进行规范化
  3. CTE3 进行计数
  4. CTE4 进行最终组装,将 CA | 3 放入 CA - (3), ...

最后一个SELECT 删除尾随逗号以获得​​更整洁的输出。

为了更好地理解每一步,可以将最后的SELECT替换为SELECT * FROM CTE1SELECT * FROM CTE2等。

【讨论】:

    【解决方案2】:

    好的,首先,您需要拆分#Temp1#Temp2 中的连接值。有多种方法可以做到这一点,我将使用 数字表 描述的 in this awesome blog 来自 Aaron Bertrand 的帖子。所以,我们需要一个数字表,可以这样完成:

    ;WITH n AS
    (
        SELECT  x = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
        FROM sys.all_objects AS s1
        CROSS JOIN sys.all_objects AS s2
    )
    SELECT Number = x
    INTO #Numbers
    FROM n
    WHERE x BETWEEN 1 AND 8000;
    

    然后,您需要实际进行拆分,然后为您的结果使用组连接方法:

    ;WITH T1 AS
    (
        SELECT *
        FROM #Table1 T
        OUTER APPLY (SELECT Item = SUBSTRING(T.States, Number,
                                             CHARINDEX(',',T.States + ',', Number) - 
                                             Number)
                     FROM #Numbers
                     WHERE Number <= CONVERT(INT, LEN(T.States))
                     AND SUBSTRING(',' + T.States, Number, LEN(',')) = ',') N
    ), T2 AS
    (
        SELECT *
        FROM #Table2 T
        OUTER APPLY (SELECT Item = SUBSTRING(T.States, Number,
                                             CHARINDEX(', ',T.States + ', ', Number) - 
                                             Number)
                     FROM #Numbers
                     WHERE Number <= CONVERT(INT, LEN(T.States))
                     AND SUBSTRING(', ' + T.States, Number, LEN(', ')) = ', ') N
    ), T3 AS
    (
        SELECT T1.Div, T1.Dept, T1.Item, COUNT(*) N
        FROM T1 
        LEFT JOIN T2
            ON T1.Div = T2.Div
            AND T1.Dept = T2.Dept
            AND T1.Item = T2.Item
        GROUP BY T1.Div, T1.Dept, T1.Item
    )
    SELECT  A.Div, 
            A.Dept, 
            States = STUFF((SELECT  ',' + CONVERT(VARCHAR(20), Item) + 
                                    ' - (' + CAST(N AS VARCHAR(4)) + ')'
                            FROM T3 
                            WHERE Div = A.Div
                            AND Dept = A.Dept
                        FOR XML PATH(''), TYPE).value('.[1]','nvarchar(max)'),1,1,'')
    FROM T3 A
    ORDER BY Div, Dept, Item
    

    结果是:

    ╔══════╦═══════╦════════════════════════════╗
    ║ Div  ║ Dept  ║           States           ║
    ╠══════╬═══════╬════════════════════════════╣
    ║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
    ║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
    ║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
    ║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
    ║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
    ║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
    ║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
    ║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
    ║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
    ║ Div4 ║ Dept1 ║ NULL                       ║
    ╚══════╩═══════╩════════════════════════════╝
    

    【讨论】:

    • 感谢@Lamak 的出色帮助。真的很感激。干杯!
    【解决方案3】:

    当然,理想情况下这些数据会被标准化,因为这很混乱。鉴于您坚持使用这种结构,我认为您最好的选择是使用状态驱动程序表或使用可用于拆分分隔值的众多拆分函数之一,然后使用它来获取每个状态这是自己的路线:

    ;WITH cte AS (SELECT DISTINCT b.Div,b.Dept,a.abbrs 
                    FROM #States a
                    RIGHT JOIN #Table1 b
                     ON ','+REPLACE(b.States,' ','')+',' LIKE '%,'+a.abbrs+',%'
                  )
         ,cte2 AS (SELECT b.Div,b.Dept,a.abbrs 
                    FROM #States a
                    JOIN #Table2 b
                     ON ','+REPLACE(b.States,' ','')+',' LIKE '%,'+a.abbrs+',%'
                  )
         ,cte3 AS (SELECT a.Div,a.Dept,a.abbrs,CAST(COUNT(b.abbrs)AS VARCHAR(25)) CT 
                    FROM  cte a
                    LEFT JOIN cte2 b
                     ON a.Dept = b.Dept
                     AND a.Div = b.Div
                     AND a.abbrs = b.abbrs
                    GROUP BY a.div,a.dept,a.abbrs
                  )
    SELECT DISTINCT
          Div,Dept
          ,STUFF((SELECT DISTINCT ',' +   abbrs+'-('+CT+')'
                                        FROM cte3 b                         
                                        WHERE a.Div = b.Div
                                          AND a.Dept = b.Dept
                                        FOR XML PATH(''), TYPE).value('.', 'VARCHAR(MAX)') 
                                        ,1,1,'')
    
    FROM  cte3 a
    

    演示:SQL Fiddle

    注意:

    • cte1 - 从 table1 创建不同的 div/dept/state 列表
    • cte2 - 从 table2 创建所有 div/dept/states 的列表
    • cte3 - 在 div/dept/state 上聚合以获取计数

    输出是:

    |  DIV |  DEPT |               STATES |
    |------|-------|----------------------|
    | Div1 | Dept1 | CA-(3),NV-(2),TX-(2) |
    | Div2 | Dept2 | IN-(1),MI-(2),OH-(2) |
    | Div3 | Dept2 | NJ-(1),NY-(1),PA-(1) |
    | Div4 | Dept1 |               (null) |
    

    更新了 fiddle 以包含您的 NULL 行并添加了输出。

    【讨论】:

    • 感谢@Goat CO 的出色帮助。真的很感激。干杯!
    猜你喜欢
    • 2023-04-04
    • 2014-11-05
    • 2021-11-03
    • 1970-01-01
    • 2014-04-18
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多