以下是 BigQuery 标准 SQL 和一些非正统的作为第一个(非常好的)使用 CROSS JOIN 的答案(这对于学生级用例来说很可能是可以的,但对于更通用的情况来说可能是一个杀手涉及真正的大数据)。所以下面使用 UNION ALL 来处理N+M vs NxM 中间行
#standardSQL
SELECT * FROM (
SELECT IF(
ts - LAST_VALUE(ts IGNORE NULLS) OVER(prev_win) < FIRST_VALUE(ts IGNORE NULLS) OVER(next_win) - ts,
LAST_VALUE(StartTime IGNORE NULLS) OVER(prev_win), FIRST_VALUE(StartTime IGNORE NULLS) OVER(next_win)
) StartTime, IF(
ts - LAST_VALUE(ts IGNORE NULLS) OVER(prev_win) < FIRST_VALUE(ts IGNORE NULLS) OVER(next_win) - ts,
LAST_VALUE(Class IGNORE NULLS) OVER(prev_win), FIRST_VALUE(Class IGNORE NULLS) OVER(next_win)
) Class, Student
FROM (
SELECT StartTime, UNIX_SECONDS(StartTime) ts, Class, '' Student FROM `project.dataset.class`
UNION ALL
SELECT DesiredStartTime, UNIX_SECONDS(DesiredStartTime), NULL, Student FROM `project.dataset.student`
)
WINDOW
prev_win AS (ORDER BY StartTime ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING),
next_win AS (ORDER BY StartTime ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
)
WHERE Student != ''
您可以使用问题中的虚拟数据进行测试、使用上述操作
#standardSQL
WITH `project.dataset.class` AS (
SELECT TIMESTAMP '2019-07-01 08:00:00' StartTime, 'English' Class UNION ALL
SELECT '2019-07-01 09:00:00', 'Chemistry' UNION ALL
SELECT '2019-07-01 10:30:00', 'Math'
), `project.dataset.student` AS (
SELECT TIMESTAMP '2019-07-01 08:45:00' DesiredStartTime, 'Jimmy' Student UNION ALL
SELECT '2019-07-01 09:15:00', 'Bobby' UNION ALL
SELECT '2019-07-01 10:00:00', 'Buddy'
)
SELECT * FROM (
SELECT IF(
ts - LAST_VALUE(ts IGNORE NULLS) OVER(prev_win) < FIRST_VALUE(ts IGNORE NULLS) OVER(next_win) - ts,
LAST_VALUE(StartTime IGNORE NULLS) OVER(prev_win), FIRST_VALUE(StartTime IGNORE NULLS) OVER(next_win)
) StartTime, IF(
ts - LAST_VALUE(ts IGNORE NULLS) OVER(prev_win) < FIRST_VALUE(ts IGNORE NULLS) OVER(next_win) - ts,
LAST_VALUE(Class IGNORE NULLS) OVER(prev_win), FIRST_VALUE(Class IGNORE NULLS) OVER(next_win)
) Class, Student
FROM (
SELECT StartTime, UNIX_SECONDS(StartTime) ts, Class, '' Student FROM `project.dataset.class`
UNION ALL
SELECT DesiredStartTime, UNIX_SECONDS(DesiredStartTime), NULL, Student FROM `project.dataset.student`
)
WINDOW
prev_win AS (ORDER BY StartTime ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING),
next_win AS (ORDER BY StartTime ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
)
WHERE Student != ''
结果如下
Row StartTime Class Student
1 2019-07-01 09:00:00 UTC Chemistry Jimmy
2 2019-07-01 09:00:00 UTC Chemistry Bobby
3 2019-07-01 10:30:00 UTC Math Buddy
如果StartTime 和DesiredStartTime 是字符串,就像从您的问题示例中一样,您显然需要首先将它们解析为TIMESTAMP,如下例所示
#standardSQL
WITH `project.dataset.class` AS (
SELECT '07/01/19 08:00' StartTime, 'English' Class UNION ALL
SELECT '07/01/19 09:00', 'Chemistry' UNION ALL
SELECT '07/01/19 10:30', 'Math'
), `project.dataset.student` AS (
SELECT '07/01/19 08:45' DesiredStartTime, 'Jimmy' Student UNION ALL
SELECT '07/01/19 09:15', 'Bobby' UNION ALL
SELECT '07/01/19 10:00', 'Buddy'
)
SELECT * FROM (
SELECT IF(
ts - LAST_VALUE(ts IGNORE NULLS) OVER(prev_win) < FIRST_VALUE(ts IGNORE NULLS) OVER(next_win) - ts,
LAST_VALUE(StartTime IGNORE NULLS) OVER(prev_win), FIRST_VALUE(StartTime IGNORE NULLS) OVER(next_win)
) StartTime, IF(
ts - LAST_VALUE(ts IGNORE NULLS) OVER(prev_win) < FIRST_VALUE(ts IGNORE NULLS) OVER(next_win) - ts,
LAST_VALUE(Class IGNORE NULLS) OVER(prev_win), FIRST_VALUE(Class IGNORE NULLS) OVER(next_win)
) Class, Student
FROM (
SELECT PARSE_TIMESTAMP('%D %R', StartTime) StartTime, UNIX_SECONDS(PARSE_TIMESTAMP('%D %R', StartTime)) ts, Class, '' Student FROM `project.dataset.class`
UNION ALL
SELECT PARSE_TIMESTAMP('%D %R', DesiredStartTime), UNIX_SECONDS(PARSE_TIMESTAMP('%D %R', DesiredStartTime)), NULL, Student FROM `project.dataset.student`
)
WINDOW
prev_win AS (ORDER BY StartTime ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING),
next_win AS (ORDER BY StartTime ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
)
WHERE Student != ''