【发布时间】:2020-02-21 05:53:54
【问题描述】:
我正在处理每月的小部件库存,偶尔会丢失几个月的数据。我想做一个“差距和岛屿”类型分析,但我无法让实现正常工作(参见 sqlfiddle)。我正在尝试创建新列,专门列出差距和岛屿开始和停止日期:
http://www.sqlfiddle.com/#!18/a212a/2
任何帮助将不胜感激
【问题讨论】:
标签: sql sql-server gaps-and-islands
我正在处理每月的小部件库存,偶尔会丢失几个月的数据。我想做一个“差距和岛屿”类型分析,但我无法让实现正常工作(参见 sqlfiddle)。我正在尝试创建新列,专门列出差距和岛屿开始和停止日期:
http://www.sqlfiddle.com/#!18/a212a/2
任何帮助将不胜感激
【问题讨论】:
标签: sql sql-server gaps-and-islands
我认为您需要以下内容。SQL fiddle
WITH StartingPoints AS
(SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
FROM dates_test A
WHERE NOT EXISTS (SELECT *
FROM dates_test B
WHERE B.ID = A.ID
AND EXTRACT(month FROM B.OCCURRANCE) =
EXTRACT(month FROM A.OCCURRANCE) - 1
and EXTRACT(year FROM B.OCCURRANCE) =
EXTRACT(year FROM A.OCCURRANCE))),
EndingPoints AS
(SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
FROM dates_test A
WHERE NOT EXISTS (SELECT *
FROM dates_test B
WHERE B.ID = A.ID
AND EXTRACT(month FROM B.OCCURRANCE) =
EXTRACT(month FROM A.OCCURRANCE) + 1
and EXTRACT(year FROM B.OCCURRANCE) =
EXTRACT(year FROM A.OCCURRANCE)))
SELECT S.ID,
EXTRACT(month FROM S.OCCURRANCE) AS start_range,
EXTRACT(month FROM E.OCCURRANCE) AS end_range
FROM StartingPoints S
JOIN EndingPoints E
ON E.ID = S.ID
AND E.rn = S.rn;
您可以在下面找到 start_gap 和 end gap
WITH StartingPoints AS
(SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
FROM dates_test A
WHERE NOT EXISTS (SELECT *
FROM dates_test B
WHERE B.ID = A.ID
AND EXTRACT(month FROM B.OCCURRANCE) =
EXTRACT(month FROM A.OCCURRANCE) - 1
and EXTRACT(year FROM B.OCCURRANCE) =
EXTRACT(year FROM A.OCCURRANCE))),
EndingPoints AS
(SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
FROM dates_test A
WHERE NOT EXISTS (SELECT *
FROM dates_test B
WHERE B.ID = A.ID
AND EXTRACT(month FROM B.OCCURRANCE) =
EXTRACT(month FROM A.OCCURRANCE) + 1
and EXTRACT(year FROM B.OCCURRANCE) =
EXTRACT(year FROM A.OCCURRANCE))),
MissingPoints AS
(SELECT S.ID,
EXTRACT(month FROM S.OCCURRANCE) AS start_range,
EXTRACT(month FROM E.OCCURRANCE) AS end_range,
EXTRACT(YEAR FROM E.OCCURRANCE) YEAR_of_OCCR
FROM StartingPoints S
JOIN EndingPoints E
ON E.ID = S.ID
AND E.rn = S.rn),
i1 as
(select level num from dual connect by level <= 12),
ms11 as
(select ID,
start_range,
end_range,
lead(start_range, 1, 0) OVER(ORDER BY id, year_of_occr, start_range, end_range) as am_i_ms,
lead(year_of_occr, 1, 0) OVER(ORDER BY id, year_of_occr) as miss_year,
year_of_occr
from MissingPoints),
miss_month1 as
(select id,
start_range,
end_range,
DECODE(end_range + num, 13, 0, 14, 0, end_range + num) missing_month,
year_of_occr
from ms11, i1
where ((end_range + num < am_i_ms or sTART_Range = end_range) and
end_range + num <= 14) or (year_of_occr<> miss_year and am_i_ms >=0 and am_i_ms <=12 and end_range + num <= 14)
order by year_of_occr, missing_month),
miss_month as
(select *
from miss_month1 A
where not exists
(select 1
from miss_month1 B
where A.ID = B.ID
AND (A.missing_month = B.start_range AND
A.missing_month = B.end_range)
and A.year_of_occr = B.year_of_occr)
and decode(end_range, 12, -1, end_range) < missing_month),
StartingmisPoints AS
(SELECT A.*,
ROW_NUMBER() OVER(ORDER BY id, year_of_occr, end_range, missing_month) AS rn
FROM miss_month A
WHERE NOT EXISTS (SELECT *
FROM miss_month B
WHERE B.ID = A.ID
and b.start_range = a.start_range
and b.end_range = a.end_range
AND B.missing_month = A.missing_month - 1
and b.year_of_occr = a.year_of_occr)),
EndingmisPoints AS
(SELECT A.*,
ROW_NUMBER() OVER(ORDER BY id, year_of_occr, end_range, missing_month) AS rn
FROM miss_month A
WHERE NOT EXISTS (SELECT *
FROM miss_month B
WHERE B.ID = A.ID
AND B.missing_month = A.missing_month + 1
and b.start_range = a.start_range
and b.end_range = a.end_range
and b.year_of_occr = a.year_of_occr))
SELECT distinct S.ID,
S.start_range,
S.end_range,
S.missing_month start_gap_range,
E.missing_month end_gap_range,
E.year_of_occr
FROM StartingmisPoints S
JOIN EndingmisPoints E
ON E.ID = S.ID
AND E.rn = S.rn
【讨论】:
经过一些修改,我已将您的查询转换为 -
WITH StartingPoints AS
(
SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY OCCURRANCE) AS rn
FROM dates_test A
WHERE NOT EXISTS (
SELECT *
FROM dates_test B
WHERE B.ID = A.ID AND EXTRACT(month FROM B.OCCURRANCE) = EXTRACT(month FROM A.OCCURRANCE) - 1)
),
EndingPoints AS
(
SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY OCCURRANCE) AS rn
FROM dates_test A
WHERE NOT EXISTS (
SELECT *
FROM dates_test B
WHERE B.ID = A.ID AND EXTRACT(month FROM B.OCCURRANCE) = EXTRACT(month FROM A.OCCURRANCE) - 1)
)
SELECT S.ID, EXTRACT(month FROM S.OCCURRANCE) AS start_range, EXTRACT(month FROM E.OCCURRANCE) AS end_range
FROM StartingPoints S
JOIN EndingPoints E ON E.ID = S.ID AND E.rn = S.rn;
从您看来,您使用的是 Oracle,而 Oracle 不支持带有“AS”关键字的表别名。如果这不是您的预期结果,请也分享您的预期结果。
Demo.
【讨论】:
您可以使用LEAD 解析函数来查找差距:
SELECT id,
ADD_MONTHS( date_from, 1 ) AS missing_from,
date_to - INTERVAL '1' SECOND AS missing_to
FROM (
SELECT id,
TRUNC( occurrance, 'MM' ) AS date_from,
LEAD( TRUNC( occurrance, 'MM' ) )
OVER ( PARTITION BY id ORDER BY TRUNC( occurrance, 'MM' ) )
AS date_to
FROM dates_test
)
WHERE ADD_MONTHS( date_from, 1 ) < date_to
ORDER BY id, date_from;
所以对于你的测试数据:
CREATE TABLE dates_test ( id, value, occurrance ) As
-- march and september and october are missing for both IDs-
SELECT 1, 10, DATE '2014-01-03' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-02-03' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-04-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-05-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-06-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-07-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-08-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-11-07' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-12-07' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2015-01-07' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-01-03' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-02-03' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-04-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-05-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-06-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-07-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-08-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-11-07' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-12-07' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2015-01-07' FROM DUAL;
这个输出:
身份证 |缺失_FROM | MISSING_TO -: | :----------------- | :----------------- 1 | 2014-03-01 00:00:00 | 2014-03-31 23:59:59 1 | 2014-09-01 00:00:00 | 2014-10-31 23:59:59 2 | 2014-03-01 00:00:00 | 2014-03-31 23:59:59 2 | 2014-09-01 00:00:00 | 2014-10-31 23:59:59
db小提琴here
【讨论】: