以下是 BigQuery 标准 SQL
#standardSQL
WITH self AS (
SELECT arr[OFFSET(0)] place, COUNT(1) cnt
FROM (
SELECT ARRAY_AGG(place) arr, id
FROM `project.dataset.table`
GROUP BY id
HAVING ARRAY_LENGTH(arr) = 1
)
GROUP BY place
), pairs AS (
SELECT id, ARRAY_AGG(place) arr
FROM `project.dataset.table`
GROUP BY id
), flat_matrix AS (
SELECT place1, place2, COUNT(DISTINCT id) cnt
FROM pairs, UNNEST(arr) place1, UNNEST(arr) place2
WHERE place1 <> place2
GROUP BY 1, 2
UNION ALL
SELECT place, place, cnt
FROM self
)
SELECT place1 place,
MAX(IF(place2 = 'A', cnt, 0)) AS A,
MAX(IF(place2 = 'B', cnt, 0)) AS B,
MAX(IF(place2 = 'C', cnt, 0)) AS C,
MAX(IF(place2 = 'D', cnt, 0)) AS D
FROM flat_matrix
您可以使用您问题中的虚拟数据进行测试,如下所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'A' place UNION ALL
SELECT 2, 'B' UNION ALL
SELECT 1, 'C' UNION ALL
SELECT 6, 'B' UNION ALL
SELECT 4, 'D' UNION ALL
SELECT 5, 'A' UNION ALL
SELECT 6, 'C' UNION ALL
SELECT 7, 'A' UNION ALL
SELECT 8, 'A' UNION ALL
SELECT 8, 'C'
), self AS (
SELECT arr[OFFSET(0)] place, COUNT(1) cnt
FROM (
SELECT ARRAY_AGG(place) arr, id
FROM `project.dataset.table`
GROUP BY id
HAVING ARRAY_LENGTH(arr) = 1
)
GROUP BY place
), pairs AS (
SELECT id, ARRAY_AGG(place) arr
FROM `project.dataset.table`
GROUP BY id
), flat_matrix AS (
SELECT place1, place2, COUNT(DISTINCT id) cnt
FROM pairs, UNNEST(arr) place1, UNNEST(arr) place2
WHERE place1 <> place2
GROUP BY 1, 2
UNION ALL
SELECT place, place, cnt
FROM self
)
SELECT place1 place,
MAX(IF(place2 = 'A', cnt, 0)) AS A,
MAX(IF(place2 = 'B', cnt, 0)) AS B,
MAX(IF(place2 = 'C', cnt, 0)) AS C,
MAX(IF(place2 = 'D', cnt, 0)) AS D
FROM flat_matrix
GROUP BY place1
-- ORDER BY place
结果为
Row place A B C D
1 A 2 0 2 0
2 B 0 1 1 0
3 C 2 1 0 0
4 D 0 0 0 1