【问题标题】:Oracle query to get all rows with duplicate set of columns in the same tableOracle查询以获取同一表中具有重复列集的所有行
【发布时间】:2020-10-15 13:49:00
【问题描述】:

我有一个 oracle 数据库表,其中包含一组重复 5 次的列。就像在下面的示例中,列(COL1_A 到 COL1_E)将在同一个表中存在 5 次,但列的名称除外,每组列中的数据类型将相同,但在一组(COL1_A 到 COL1_E)中确实如此没有相同的数据类型。

Cli_Id,Country,Name,COL1_A,COL1_B,COL1_C,COL1_D,COL1_E,COL2_A,COL2_B,COL2_C,COL2_D,COL2_E,COL3_A COL3_B,..
1      Test1   Lo1     1      2       3       4     5      1      2      3      4     5  
2      Test2   Lo2     5      6       7       8     9      5      3      3      4     5
3      Test3   Lo3     10     20      30      40   50      5      3      3      4     5
4      Test4   Lo4     11     22      32      42   52      53     3      3      4     5
5      Test5   Lo5     11     22      32      42   52      11     22      32      42   52

我需要编写一个查询来检索在列集之间包含重复项的所有行。

预期的结果

Cli_Id,Country,Name,COL1_A,COL1_B,COL1_C,COL1_D,COL1_E,COL2_A,COL2_B,COL2_C,COL2_D,COL2_E,COL3_A COL3_B,..
1      Test1   Lo1     1      2       3       4     5      1      2      3      4     5  
5      Test5   Lo5     11     22      32      42   52      11     22      32      42   52

我写了一个有几个联合但可能会影响性能,因为我们打算将集合增加到 8。因此寻找一种更简单的方法来编写这个查询。

您能否就更简单的查询提出建议,记住它应该是高效的,并且运行时间不会太长。

Select distinct CLi_id,Country,Name
from Table1
Where COL1_A = COL2_A
AND COL1_B = COL2_B
AND COL1_C = COL2_C
AND COL1_D = COL2_D
AND COL1_E = COL2_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL1_A = COL3_A
AND COL1_B = COL3_B
AND COL1_C = COL3_C
AND COL1_D = COL3_D
AND COL1_E = COL3_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL1_A = COL4_A
AND COL1_B = COL4_B
AND COL1_C = COL4_C
AND COL1_D = COL4_D
AND COL1_E = COL4_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL1_A = COL5_A
AND COL1_B = COL5_B
AND COL1_C = COL5_C
AND COL1_D = COL5_D
AND COL1_E = COL5_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL2_A = COL3_A
AND COL2_B = COL3_B
AND COL2_C = COL3_C
AND COL2_D = COL3_D
AND COL2_E = COL3_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL2_A = COL4_A
AND COL2_B = COL4_B
AND COL2_C = COL4_C
AND COL2_D = COL4_D
AND COL2_E = COL4_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL2_A = COL5_A
AND COL2_B = COL5_B
AND COL2_C = COL5_C
AND COL2_D = COL5_D
AND COL2_E = COL5_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL3_A = COL4_A
AND COL3_B = COL4_B
AND COL3_C = COL4_C
AND COL3_D = COL4_D
AND COL3_E = COL4_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL3_A = COL5_A
AND COL3_B = COL5_B
AND COL3_C = COL5_C
AND COL3_D = COL5_D
AND COL3_E = COL5_E
UNION
Select distinct CLi_id,Country,Name
from Table1
Where COL4_A = COL5_A
AND COL4_B = COL5_B
AND COL4_C = COL5_C
AND COL4_D = COL5_D
AND COL4_E = COL5_E

【问题讨论】:

    标签: sql oracle duplicates oracle-sqldeveloper


    【解决方案1】:

    您可以使用ORIN,如下所示:

    Select distinct CLi_id,Country,Name
    from Table1
    Where (COL1_A, COL1_B, COL1_C, COL1_D, COL1_E) 
          IN ((COL2_A, COL2_B, COL2_C, COL2_D, COL2_E), 
              (COL3_A, COL3_B, COL3_C, COL3_D, COL3_E), 
              (COL4_A, COL4_B, COL4_C, COL4_D, COL4_E),
              (COL5_A, COL5_B, COL5_C, COL5_D, COL5_E))
    OR (COL2_A, COL2_B, COL2_C, COL2_D, COL2_E) 
          IN ((COL3_A, COL3_B, COL3_C, COL3_D, COL3_E), 
              (COL4_A, COL4_B, COL4_C, COL4_D, COL4_E),
              (COL5_A, COL5_B, COL5_C, COL5_D, COL5_E))
    OR (COL3_A, COL3_B, COL3_C, COL3_D, COL3_E) 
          IN ((COL4_A, COL4_B, COL4_C, COL4_D, COL4_E),
              (COL5_A, COL5_B, COL5_C, COL5_D, COL5_E))
    OR (COL4_A, COL4_B, COL4_C, COL4_D, COL4_E) 
          IN ((COL5_A, COL5_B, COL5_C, COL5_D, COL5_E))
    

    【讨论】:

      【解决方案2】:

      由于您是在比较与识别属性的交叉级别对应的列级别值,因此您可以乐观地使用UNION ALL。无需加入您在查询中提到的列。

      以下查询将为您提供行级属性,

      select Cli_Id,Country,Name, col_A, col_B, col_C, col_D, count(*) from 
      (select Cli_Id,Country,Name,COL1_A as col_A,COL1_B as col_B,COL1_C as col_C,COL1_D as col_D,COL1_E as col_E from table
      union all
      select Cli_Id,Country,Name,COL2_A as col_A,COL2_B as col_B,COL2_C as col_C,COL2_D  as col_D,COL2_E as col_E from table
      union all
      select Cli_Id,Country,Name,COL3_A as col_A,COL3_B as col_B,COL3_C as col_C,COL3_D  as 
      col_D,COL3_E as col_E from table
      union all
      select Cli_Id,Country,Name,COL4_A as col_A,COL4_B as col_B,COL4_C as col_C,COL4_D  as col_D,COL4_E as col_E from table
      union all
      select Cli_Id,Country,Name,COL5_A as col_A,COL5_B as col_B,COL5_C as col_C,COL5_D  as col_D,COL5_E as col_E from table)
      group by Cli_Id,Country,Name, col_A, col_B, col_C, col_D having count(*) > 1;
      

      以下查询将为您提供列级属性,

      select distinct t1.* from table t1
      inner join 
      (select Cli_Id,Country,Name, col_A, col_B, col_C, col_D, count(*) from 
      (select Cli_Id,Country,Name,COL1_A as col_A,COL1_B as col_B,COL1_C as col_C,COL1_D as col_D,COL1_E as col_E from table
      union all
      select Cli_Id,Country,Name,COL2_A as col_A,COL2_B as col_B,COL2_C as col_C,COL2_D  as col_D,COL2_E as col_E from table
      union all
      select Cli_Id,Country,Name,COL3_A as col_A,COL3_B as col_B,COL3_C as col_C,COL3_D  as col_D,COL3_E as col_E from table
      union all
      select Cli_Id,Country,Name,COL4_A as col_A,COL4_B as col_B,COL4_C as col_C,COL4_D  as col_D,COL4_E as col_E from table
      union all
      select Cli_Id,Country,Name,COL5_A as col_A,COL5_B as col_B,COL5_C as col_C,COL5_D  as col_D,COL5_E as col_E from table
      group by Cli_Id,Country,Name, col_A, col_B, col_C, col_D having count(*) > 1) t2
      on (t1.Cli_Id = t2.Cli_Id);
      

      【讨论】:

      • 很抱歉,但不知道为什么我会收到缺少左括号的错误
      • 请立即尝试。编辑查询
      【解决方案3】:

      Unpivot 数据,按列和值分组,取这个 id,其中 min(count) 等于序列数(在您的示例 3 中,将当前数据更改为 5,将来更改为 8):

      demo

      select * 
        from table1 
        where cli_id in (
          select cli_id from (
            select cli_id, col, val, count(1) cnt
              from table1 
              unpivot (val for col in 
                (COL1_A as 'a', COL1_B as 'b', COL1_C as 'c', COL1_D as 'd', COL1_E as 'e',
                 COL2_A as 'a', COL2_B as 'b', COL2_C as 'c', COL2_D as 'd', COL2_E as 'e',
                 COL3_A as 'a', COL3_B as 'b', COL3_C as 'c', COL3_D as 'd', COL3_E as 'e'))
              group by cli_id, col, val )
            group by cli_id
            having min(cnt) = 3)
      

      编辑:

      当至少有 2 组时,我会认为某一行有重复 列数相同

      所以我们必须加入未透视的数据:

      dbfiddle

      with u as (
        select cli_id, country, name, ltr, num, val
              from table1 
              unpivot (val for (num, ltr) in (
                COL1_A as (1, 'a'), COL1_b as (1, 'b'), COL1_c as (1, 'c'), 
                COL1_d as (1, 'd'), COL1_e as (1, 'e'),
                COL2_A as (2, 'a'), COL2_b as (2, 'b'), COL2_c as (2, 'c'), 
                COL2_d as (2, 'd'), COL2_e as (2, 'e'),
                COL3_A as (3, 'a'), COL3_b as (3, 'b'), COL3_c as (3, 'c'), 
                COL3_d as (3, 'd'), COL3_e as (3, 'e')
                )))
      select distinct cli_id, country, name
        from u a join u b using (cli_id, country, name, ltr, val) 
        where a.num < b.num
        group by cli_id, country, name, a.num, b.num
        having count(1) = 5
      

      【讨论】:

      • 感谢您的回复。但是不,它不符合我的要求。当至少有 2 组相同的列时,我会认为一行有重复
      猜你喜欢
      • 1970-01-01
      • 2020-05-07
      • 2019-09-17
      • 1970-01-01
      • 2019-02-01
      • 2022-06-17
      • 1970-01-01
      • 1970-01-01
      • 2018-12-30
      相关资源
      最近更新 更多