为了得到想要的结果,我会使用数据的关系表示而不是数组。
arrayJoin 帮助将数组转换为关系:
SELECT avg(duration)
FROM
(
SELECT max(time) - min(time) AS duration
FROM
(
SELECT
data.1 AS id,
toDateTime(data.2) AS time
FROM
(
SELECT arrayJoin([('aa', '2020-11-08 01:00:01'), ('aa', '2020-11-08 01:00:03'), ('aa', '2020-11-08 01:00:05'), ('ab', '2020-11-09 01:00:00'), ('ab', '2020-11-09 01:00:05'), ('ab', '2020-11-09 01:00:15')]) AS data
)
)
GROUP BY id
)
/*
┌─avg(duration)─┐
│ 9.5 │
└───────────────┘
*/
基于数组的决策。考虑到它可能比基于关系的方法慢得多(在选择最好的之前检查它们)。这个实现可以通过使用arrayReduceInRanges-function 来改进。
SELECT
arraySort(x -> (x.1), data) AS sorted_array,
arraySplit((x, y) -> y, sorted_array, arrayMap((x, i) -> if(i = 1, 1, if((x.1) = ((sorted_array[i - 1]).1), 0, 1)), sorted_array, arrayEnumerate(sorted_array))) AS session_arrays,
arrayMap(arr -> arrayReduce('min', arrayMap(x -> (x.2), arr)), session_arrays) AS min_session_times,
arrayMap(arr -> arrayReduce('max', arrayMap(x -> (x.2), arr)), session_arrays) AS max_session_times,
arrayReduce('avg', arrayMap((x, y) -> (y - x), min_session_times, max_session_times)) AS avg
FROM
(
SELECT [('aa', toDateTime('2020-11-08 01:00:01')), ('aa', toDateTime('2020-11-08 01:00:03')), ('aa', toDateTime('2020-11-08 01:00:05')), ('ab', toDateTime('2020-11-09 01:00:00')), ('ab', toDateTime('2020-11-09 01:00:05')), ('ab', toDateTime('2020-11-09 01:00:15'))] AS data
)
/*
Row 1:
──────
sorted_array: [('aa','2020-11-08 01:00:01'),('aa','2020-11-08 01:00:03'),('aa','2020-11-08 01:00:05'),('ab','2020-11-09 01:00:00'),('ab','2020-11-09 01:00:05'),('ab','2020-11-09 01:00:15')]
session_arrays: [[('aa','2020-11-08 01:00:01'),('aa','2020-11-08 01:00:03'),('aa','2020-11-08 01:00:05')],[('ab','2020-11-09 01:00:00'),('ab','2020-11-09 01:00:05'),('ab','2020-11-09 01:00:15')]]
min_session_times: ['2020-11-08 01:00:01','2020-11-09 01:00:00']
max_session_times: ['2020-11-08 01:00:05','2020-11-09 01:00:15']
avg: 9.5
*/