【问题标题】:How to find missing dates in monthly data series?如何在月度数据系列中查找缺失的日期?
【发布时间】:2020-02-21 05:53:54
【问题描述】:

我正在处理每月的小部件库存,偶尔会丢失几个月的数据。我想做一个“差距和岛屿”类型分析,但我无法让实现正常工作(参见 sqlfiddle)。我正在尝试创建新列,专门列出差距和岛屿开始和停止日期:

http://www.sqlfiddle.com/#!18/a212a/2

任何帮助将不胜感激

【问题讨论】:

    标签: sql sql-server gaps-and-islands


    【解决方案1】:

    我认为您需要以下内容。SQL fiddle

      WITH StartingPoints AS
         (SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
            FROM dates_test A
           WHERE NOT EXISTS (SELECT *
                    FROM dates_test B
                   WHERE B.ID = A.ID
                     AND EXTRACT(month FROM B.OCCURRANCE) =
                         EXTRACT(month FROM A.OCCURRANCE) - 1
                     and EXTRACT(year FROM B.OCCURRANCE) =
                         EXTRACT(year FROM A.OCCURRANCE))),
        EndingPoints AS
         (SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
            FROM dates_test A
           WHERE NOT EXISTS (SELECT *
                    FROM dates_test B
                   WHERE B.ID = A.ID
                     AND EXTRACT(month FROM B.OCCURRANCE) =
                         EXTRACT(month FROM A.OCCURRANCE) + 1
                     and EXTRACT(year FROM B.OCCURRANCE) =
                         EXTRACT(year FROM A.OCCURRANCE)))
        SELECT S.ID,
               EXTRACT(month FROM S.OCCURRANCE) AS start_range,
               EXTRACT(month FROM E.OCCURRANCE) AS end_range
          FROM StartingPoints S
          JOIN EndingPoints E
            ON E.ID = S.ID
           AND E.rn = S.rn;
    

    您可以在下面找到 start_gap 和 end gap

                            WITH StartingPoints AS
         (SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
            FROM dates_test A
           WHERE NOT EXISTS (SELECT *
                    FROM dates_test B
                   WHERE B.ID = A.ID
                     AND EXTRACT(month FROM B.OCCURRANCE) =
                         EXTRACT(month FROM A.OCCURRANCE) - 1
                     and EXTRACT(year FROM B.OCCURRANCE) =
                         EXTRACT(year FROM A.OCCURRANCE))),
        EndingPoints AS
         (SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
            FROM dates_test A
           WHERE NOT EXISTS (SELECT *
                    FROM dates_test B
                   WHERE B.ID = A.ID
                     AND EXTRACT(month FROM B.OCCURRANCE) =
                         EXTRACT(month FROM A.OCCURRANCE) + 1
                     and EXTRACT(year FROM B.OCCURRANCE) =
                         EXTRACT(year FROM A.OCCURRANCE))),
        MissingPoints AS
         (SELECT S.ID,
                 EXTRACT(month FROM S.OCCURRANCE) AS start_range,
                 EXTRACT(month FROM E.OCCURRANCE) AS end_range,
                 EXTRACT(YEAR FROM E.OCCURRANCE) YEAR_of_OCCR
            FROM StartingPoints S
            JOIN EndingPoints E
              ON E.ID = S.ID
             AND E.rn = S.rn),
        i1 as
         (select level num from dual connect by level <= 12),
        ms11 as
         (select ID,
                 start_range,
                 end_range,
                 lead(start_range, 1, 0) OVER(ORDER BY id, year_of_occr, start_range, end_range) as am_i_ms,
                 lead(year_of_occr, 1, 0) OVER(ORDER BY id, year_of_occr) as miss_year,
                 year_of_occr
            from MissingPoints),
        miss_month1 as
         (select id,
                 start_range,
                 end_range,
                 DECODE(end_range + num, 13, 0, 14, 0, end_range + num) missing_month,
                 year_of_occr
            from ms11, i1
           where ((end_range + num < am_i_ms or sTART_Range = end_range) and
                 end_range + num <= 14) or (year_of_occr<> miss_year and am_i_ms >=0 and  am_i_ms <=12 and end_range + num <= 14)
           order by year_of_occr, missing_month),
        miss_month as
         (select *
            from miss_month1 A
           where not exists
           (select 1
                    from miss_month1 B
                   where A.ID = B.ID
                     AND (A.missing_month = B.start_range AND
                         A.missing_month = B.end_range)
                     and A.year_of_occr = B.year_of_occr)
             and decode(end_range, 12, -1, end_range) < missing_month),
        StartingmisPoints AS
         (SELECT A.*,
                 ROW_NUMBER() OVER(ORDER BY id, year_of_occr, end_range, missing_month) AS rn
            FROM miss_month A
           WHERE NOT EXISTS (SELECT *
                    FROM miss_month B
                   WHERE B.ID = A.ID
                     and b.start_range = a.start_range
                     and b.end_range = a.end_range
                     AND B.missing_month = A.missing_month - 1
                     and b.year_of_occr = a.year_of_occr)),
        EndingmisPoints AS
         (SELECT A.*,
                 ROW_NUMBER() OVER(ORDER BY id, year_of_occr, end_range, missing_month) AS rn
            FROM miss_month A
           WHERE NOT EXISTS (SELECT *
                    FROM miss_month B
                   WHERE B.ID = A.ID
                     AND B.missing_month = A.missing_month + 1
                     and b.start_range = a.start_range
                     and b.end_range = a.end_range
                     and b.year_of_occr = a.year_of_occr))
        SELECT distinct  S.ID,
               S.start_range,
               S.end_range,
               S.missing_month start_gap_range,
               E.missing_month end_gap_range,
               E.year_of_occr
          FROM StartingmisPoints S
          JOIN EndingmisPoints E
            ON E.ID = S.ID
           AND E.rn = S.rn
    

    【讨论】:

    • 这与我正在寻找的非常接近。理想情况下,它将显示孤岛/间隙开始和结束的日期字段(例如,创建 4 个新列:孤岛开始/停止,间隙开始/停止)
    【解决方案2】:

    经过一些修改,我已将您的查询转换为 -

    WITH StartingPoints AS
    (
        SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY OCCURRANCE) AS rn
        FROM dates_test A
        WHERE NOT EXISTS (
            SELECT *
            FROM dates_test B
            WHERE B.ID = A.ID AND EXTRACT(month FROM B.OCCURRANCE) = EXTRACT(month FROM A.OCCURRANCE) - 1)
    ),
    EndingPoints AS
    (
        SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY OCCURRANCE) AS rn
        FROM dates_test A
        WHERE NOT EXISTS (
            SELECT *
            FROM dates_test B
            WHERE B.ID = A.ID AND EXTRACT(month FROM B.OCCURRANCE) = EXTRACT(month FROM A.OCCURRANCE) - 1)
    )
    SELECT S.ID, EXTRACT(month FROM S.OCCURRANCE) AS start_range, EXTRACT(month FROM E.OCCURRANCE) AS end_range
    FROM StartingPoints S
    JOIN EndingPoints E ON E.ID = S.ID AND E.rn = S.rn;
    

    从您看来,您使用的是 Oracle,而 Oracle 不支持带有“AS”关键字的表别名。如果这不是您的预期结果,请也分享您的预期结果。

    Demo.

    【讨论】:

    • 非常抱歉,oracle 标志出错。感谢您尝试解决问题
    【解决方案3】:

    您可以使用LEAD 解析函数来查找差距:

    SELECT id,
           ADD_MONTHS( date_from, 1 ) AS missing_from,
           date_to - INTERVAL '1' SECOND AS missing_to
    FROM   (
      SELECT id,
             TRUNC( occurrance, 'MM' ) AS date_from,
             LEAD( TRUNC( occurrance, 'MM' ) )
               OVER ( PARTITION BY id ORDER BY TRUNC( occurrance, 'MM' ) )
               AS date_to
      FROM   dates_test
    )
    WHERE  ADD_MONTHS( date_from, 1 ) < date_to
    ORDER BY id, date_from;
    

    所以对于你的测试数据:

    CREATE TABLE dates_test ( id, value, occurrance ) As
    -- march and september and october are missing for both IDs-
    SELECT 1, 10, DATE '2014-01-03' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2014-02-03' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2014-04-01' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2014-05-01' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2014-06-01' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2014-07-01' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2014-08-01' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2014-11-07' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2014-12-07' FROM DUAL UNION ALL
    SELECT 1, 10, DATE '2015-01-07' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-01-03' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-02-03' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-04-01' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-05-01' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-06-01' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-07-01' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-08-01' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-11-07' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2014-12-07' FROM DUAL UNION ALL
    SELECT 2, 10, DATE '2015-01-07' FROM DUAL;
    

    这个输出:

    身份证 |缺失_FROM | MISSING_TO -: | :----------------- | :----------------- 1 | 2014-03-01 00:00:00 | 2014-03-31 23:59:59 1 | 2014-09-01 00:00:00 | 2014-10-31 23:59:59 2 | 2014-03-01 00:00:00 | 2014-03-31 23:59:59 2 | 2014-09-01 00:00:00 | 2014-10-31 23:59:59

    db小提琴here

    【讨论】:

      猜你喜欢
      • 2016-11-17
      • 2017-01-31
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2015-06-29
      • 2022-01-20
      相关资源
      最近更新 更多