- 首先,MAP 表必须有一个列可以为映射提供优先级。
- 那么你应该 PIVOT 这个表。
- 下一步是合并所有源表的 UNION ALL。
- 最后,我们可以使用 FIRST_VALUE 函数连接所有并选择我们的值。
- 有了这样的结果,您可以在 MERGE 中替换它。
用于测试的结构和样本数据:
CREATE OR REPLACE TABLE SOURCE1 (
ID int,
NAME string,
CATEGORY string,
HEIGHT numeric);
CREATE OR REPLACE TABLE SOURCE2 (
ID int,
NAME string,
CATEGORY string,
HEIGHT numeric);
CREATE OR REPLACE TABLE SOURCE3 (
ID int,
NAME string,
CATEGORY string,
HEIGHT numeric);
CREATE OR REPLACE TABLE MAP (
PRIORITY int,
SOURCE_COLUMN string,
SOURCE_TABLE string);
INSERT INTO SOURCE1 (ID, NAME, CATEGORY, HEIGHT)
VALUES (1, 'A', 'T1', 4),
(2, 'B', 'T2', 5),
(3, 'C', 'T3', 6);
INSERT INTO SOURCE2 (ID, NAME, CATEGORY, HEIGHT)
VALUES (1, 'A1', 'T1', 4.4),
(2, 'B1', 'T22', 6),
(3, NULL, 'T3', 7.2);
INSERT INTO SOURCE3 (ID, NAME, CATEGORY, HEIGHT)
VALUES (1, 'A12', 'T21', NULL),
(2, 'B', NULL, 6),
(3, 'C3', 'T3', NULL);
INSERT INTO MAP (PRIORITY, SOURCE_COLUMN, SOURCE_TABLE)
VALUES (1, 'NAME', 'SOURCE3'),
(2, 'NAME', 'SOURCE1'),
(3, 'NAME', 'SOURCE2'),
(1, 'CATEGORY', 'SOURCE2'),
(2, 'CATEGORY', 'SOURCE3'),
(3, 'CATEGORY', 'SOURCE1'),
(1, 'HEIGHT', 'SOURCE1'),
(2, 'HEIGHT', 'SOURCE2'),
(3, 'HEIGHT', 'SOURCE3');
以及我对解决方案的建议:
WITH _MAP AS (
SELECT *
FROM MAP
PIVOT (MAX(SOURCE_TABLE) FOR SOURCE_COLUMN IN ('NAME', 'CATEGORY', 'HEIGHT')) AS p(PRIORITY, NAME, CATEGORY, HEIGHT)
), _SRC AS (
SELECT 'SOURCE1' AS SOURCE_TABLE, ID, NAME, CATEGORY, HEIGHT FROM SOURCE1
UNION ALL
SELECT 'SOURCE2' AS SOURCE_TABLE, ID, NAME, CATEGORY, HEIGHT FROM SOURCE2
UNION ALL
SELECT 'SOURCE3' AS SOURCE_TABLE, ID, NAME, CATEGORY, HEIGHT FROM SOURCE3
)
SELECT DISTINCT _SRC.ID,
FIRST_VALUE(_SRC.NAME) OVER(PARTITION BY _SRC.ID ORDER BY MN.PRIORITY) AS NAME,
FIRST_VALUE(_SRC.CATEGORY) OVER(PARTITION BY _SRC.ID ORDER BY MC.PRIORITY) AS CATEGORY,
FIRST_VALUE(_SRC.HEIGHT) OVER(PARTITION BY _SRC.ID ORDER BY MH.PRIORITY) AS HEIGHT,
REPLACE(FIRST_VALUE(_SRC.SOURCE_TABLE) OVER(PARTITION BY _SRC.ID ORDER BY MN.PRIORITY) || '-' ||
FIRST_VALUE(_SRC.SOURCE_TABLE) OVER(PARTITION BY _SRC.ID ORDER BY MC.PRIORITY) || '-' ||
FIRST_VALUE(_SRC.SOURCE_TABLE) OVER(PARTITION BY _SRC.ID ORDER BY MH.PRIORITY), 'SOURCE', 'S') AS LINEAGE
FROM _SRC
LEFT JOIN _MAP AS MN ON _SRC.SOURCE_TABLE = MN.NAME AND _SRC.NAME IS NOT NULL
LEFT JOIN _MAP AS MC ON _SRC.SOURCE_TABLE = MC.CATEGORY AND _SRC.CATEGORY IS NOT NULL
LEFT JOIN _MAP AS MH ON _SRC.SOURCE_TABLE = MH.HEIGHT AND _SRC.HEIGHT IS NOT NULL;
结果:
+----+------+----------+--------+----------+
| ID | NAME | CATEGORY | HEIGHT | LINEAGE |
+----+------+----------+--------+----------+
| 1 | A12 | T1 | 4 | S3-S2-S1 |
| 2 | B | T22 | 5 | S3-S2-S1 |
| 3 | C3 | T3 | 6 | S3-S2-S1 |
+----+------+----------+--------+----------+