【发布时间】:2022-01-08 19:07:58
【问题描述】:
我有一个为 Informix 编写的 SQL 查询
SELECT cols
FROM table1 t1, outer(table2 t2, table3 t3)
WHERE t1.id = t2.id and t2.type = t3.type
我们在 Hive 上有相同的表和数据,我想将其转换为 HiveQL
【问题讨论】:
标签: sql join hive hiveql informix
我有一个为 Informix 编写的 SQL 查询
SELECT cols
FROM table1 t1, outer(table2 t2, table3 t3)
WHERE t1.id = t2.id and t2.type = t3.type
我们在 Hive 上有相同的表和数据,我想将其转换为 HiveQL
【问题讨论】:
标签: sql join hive hiveql informix
使用 LEFT JOIN 代替 OUTER
SELECT cols
FROM table1 t1
LEFT JOIN
(select t2.id as join_key, other_cols
from table2 t2
inner join table3 t3 ON t2.type = t3.type
) s ON s.join_key=t1.id
【讨论】:
FROM table1 t1, OUTER(table2 t2, OUTER table3 t3)。
Informix 样式的外连接非常有趣、独特且完全不标准。 Informix 对 ANSI (ISO) 标准 SQL 外连接的实现应该是完全无聊的,并且与其他 DBMS 相同。
这是一个 SQL 脚本,它创建和填充 3 个表并针对这些表执行 5 个不同的查询。
CREATE TABLE table1
(
id SERIAL NOT NULL PRIMARY KEY,
data VARCHAR(32) NOT NULL
);
CREATE TABLE table2
(
id INTEGER NOT NULL,
type INTEGER NOT NULL,
info VARCHAR(32) NOT NULL,
PRIMARY KEY(id, type)
);
CREATE TABLE table3
(
type INTEGER NOT NULL PRIMARY KEY,
name VARCHAR(32) NOT NULL
);
INSERT INTO table1 VALUES(100, 'Table 1 - ID 100');
INSERT INTO table1 VALUES(101, 'Table 1 - ID 101');
INSERT INTO table1 VALUES(102, 'Table 1 - ID 102');
INSERT INTO table1 VALUES(103, 'Table 1 - ID 103');
INSERT INTO table1 VALUES(104, 'Table 1 - ID 104');
INSERT INTO table2 VALUES(100, 300, 'Table 2 - ID 100, Type 300');
INSERT INTO table2 VALUES(100, 301, 'Table 2 - ID 100, Type 301');
INSERT INTO table2 VALUES(100, 302, 'Table 2 - ID 100, Type 302');
INSERT INTO table2 VALUES(101, 301, 'Table 2 - ID 101, Type 301');
INSERT INTO table2 VALUES(101, 400, 'Table 2 - ID 101, Type 400');
INSERT INTO table2 VALUES(101, 302, 'Table 2 - ID 101, Type 302');
INSERT INTO table2 VALUES(103, 302, 'Table 2 - ID 103, Type 302');
INSERT INTO table2 VALUES(103, 303, 'Table 2 - ID 103, Type 303');
INSERT INTO table2 VALUES(103, 300, 'Table 2 - ID 103, Type 300');
INSERT INTO table2 VALUES(107, 300, 'Table 2 - ID 107, Type 300');
INSERT INTO table2 VALUES(107, 400, 'Table 2 - ID 107, Type 400');
INSERT INTO table3 VALUES(300, 'Table 3 - Type 300');
INSERT INTO table3 VALUES(301, 'Table 3 - Type 301');
INSERT INTO table3 VALUES(302, 'Table 3 - Type 302');
INSERT INTO table3 VALUES(303, 'Table 3 - Type 303');
INSERT INTO table3 VALUES(304, 'Table 3 - Type 304');
INSERT INTO table3 VALUES(305, 'Table 3 - Type 305');
SELECT t1.id as t1_id, t1.data, t2.id as t2_id, t2.type as t2_type, t2.info, t3.type as t3_type, t3.name
FROM table1 t1, OUTER(table2 t2, table3 t3)
WHERE t1.id = t2.id AND t2.type = t3.type;
SELECT t1.id as t1_id, t1.data, t2.id as t2_id, t2.type as t2_type, t2.info, t3.type as t3_type, t3.name
FROM table1 t1, OUTER(table2 t2, OUTER table3 t3)
WHERE t1.id = t2.id AND t2.type = t3.type;
SELECT t1.id as t1_id, t1.data, t2.id as t2_id, t2.type as t2_type, t2.info, t3.type as t3_type, t3.name
FROM table1 t1
LEFT JOIN table2 t2 ON t1.id = t2.id
LEFT JOIN table3 t3 ON t2.type = t3.type;
SELECT t1.id as t1_id, t1.data, s.join_key as t2_id, s.t2_type, s.info, s.t3_type, s.name
FROM table1 t1
LEFT JOIN
(SELECT t2.id AS join_key, t2.info, t3.name, t2.type as t2_type, t3.type as t3_type
FROM table2 t2
JOIN table3 t3 ON t2.type = t3.type
) s ON s.join_key = t1.id;
SELECT t1.id as t1_id, t1.data, t2.id as t2_id, t2.type as t2_type, t2.info, t3.type as t3_type, t3.name
FROM table1 t1
LEFT JOIN table2 t2 ON t1.id = t2.id
JOIN table3 t3 ON t2.type = t3.type;
第一个查询是来自问题的查询,并指定了选定的列名。每个查询中的列名列表都是相同的。
第二个查询是一个变体,使用了一个额外的 OUTER 关键字。它对应于第三个查询,即leftjoin 创建的first answer。第三个查询是@leftjoin 生成的第一个查询。第四个查询是@leftjoin 创建的'current answer'(严格来说是第三个修订版)。第五个查询是第四个查询的变体——它产生不同的答案,所以它不等价。
这些是查询的输出。
SELECT t1.id as t1_id, t1.data, t2.id as t2_id, t2.type as t2_type, t2.info, t3.type as t3_type, t3.name
FROM table1 t1, OUTER(table2 t2, table3 t3)
WHERE t1.id = t2.id AND t2.type = t3.type;
| t1_id | data | t2_id | t2_type | info | t3_type | name |
|---|---|---|---|---|---|---|
| 100 | Table 1 - ID 100 | 100 | 300 | Table 2 - ID 100, Type 300 | 300 | Table 3 - Type 300 |
| 100 | Table 1 - ID 100 | 100 | 301 | Table 2 - ID 100, Type 301 | 301 | Table 3 - Type 301 |
| 100 | Table 1 - ID 100 | 100 | 302 | Table 2 - ID 100, Type 302 | 302 | Table 3 - Type 302 |
| 101 | Table 1 - ID 101 | 101 | 301 | Table 2 - ID 101, Type 301 | 301 | Table 3 - Type 301 |
| 101 | Table 1 - ID 101 | 101 | 302 | Table 2 - ID 101, Type 302 | 302 | Table 3 - Type 302 |
| 102 | Table 1 - ID 102 | |||||
| 103 | Table 1 - ID 103 | 103 | 300 | Table 2 - ID 103, Type 300 | 300 | Table 3 - Type 300 |
| 103 | Table 1 - ID 103 | 103 | 302 | Table 2 - ID 103, Type 302 | 302 | Table 3 - Type 302 |
| 103 | Table 1 - ID 103 | 103 | 303 | Table 2 - ID 103, Type 303 | 303 | Table 3 - Type 303 |
| 104 | Table 1 - ID 104 |
SELECT t1.id as t1_id, t1.data, t2.id as t2_id, t2.type as t2_type, t2.info, t3.type as t3_type, t3.name
FROM table1 t1, OUTER(table2 t2, OUTER table3 t3)
WHERE t1.id = t2.id AND t2.type = t3.type;
| t1_id | data | t2_id | t2_type | info | t3_type | name |
|---|---|---|---|---|---|---|
| 100 | Table 1 - ID 100 | 100 | 300 | Table 2 - ID 100, Type 300 | 300 | Table 3 - Type 300 |
| 100 | Table 1 - ID 100 | 100 | 301 | Table 2 - ID 100, Type 301 | 301 | Table 3 - Type 301 |
| 100 | Table 1 - ID 100 | 100 | 302 | Table 2 - ID 100, Type 302 | 302 | Table 3 - Type 302 |
| 101 | Table 1 - ID 101 | 101 | 301 | Table 2 - ID 101, Type 301 | 301 | Table 3 - Type 301 |
| 101 | Table 1 - ID 101 | 101 | 302 | Table 2 - ID 101, Type 302 | 302 | Table 3 - Type 302 |
| 101 | Table 1 - ID 101 | 101 | 400 | Table 2 - ID 101, Type 400 | ||
| 102 | Table 1 - ID 102 | |||||
| 103 | Table 1 - ID 103 | 103 | 300 | Table 2 - ID 103, Type 300 | 300 | Table 3 - Type 300 |
| 103 | Table 1 - ID 103 | 103 | 302 | Table 2 - ID 103, Type 302 | 302 | Table 3 - Type 302 |
| 103 | Table 1 - ID 103 | 103 | 303 | Table 2 - ID 103, Type 303 | 303 | Table 3 - Type 303 |
| 104 | Table 1 - ID 104 |
SELECT t1.id as t1_id, t1.data, t2.id as t2_id, t2.type as t2_type, t2.info, t3.type as t3_type, t3.name
FROM table1 t1
LEFT JOIN table2 t2 ON t1.id = t2.id
LEFT JOIN table3 t3 ON t2.type = t3.type;
| t1_id | data | t2_id | t2_type | info | t3_type | name |
|---|---|---|---|---|---|---|
| 100 | Table 1 - ID 100 | 100 | 300 | Table 2 - ID 100, Type 300 | 300 | Table 3 - Type 300 |
| 100 | Table 1 - ID 100 | 100 | 301 | Table 2 - ID 100, Type 301 | 301 | Table 3 - Type 301 |
| 100 | Table 1 - ID 100 | 100 | 302 | Table 2 - ID 100, Type 302 | 302 | Table 3 - Type 302 |
| 101 | Table 1 - ID 101 | 101 | 301 | Table 2 - ID 101, Type 301 | 301 | Table 3 - Type 301 |
| 101 | Table 1 - ID 101 | 101 | 302 | Table 2 - ID 101, Type 302 | 302 | Table 3 - Type 302 |
| 101 | Table 1 - ID 101 | 101 | 400 | Table 2 - ID 101, Type 400 | ||
| 102 | Table 1 - ID 102 | |||||
| 103 | Table 1 - ID 103 | 103 | 300 | Table 2 - ID 103, Type 300 | 300 | Table 3 - Type 300 |
| 103 | Table 1 - ID 103 | 103 | 302 | Table 2 - ID 103, Type 302 | 302 | Table 3 - Type 302 |
| 103 | Table 1 - ID 103 | 103 | 303 | Table 2 - ID 103, Type 303 | 303 | Table 3 - Type 303 |
| 104 | Table 1 - ID 104 |
SELECT t1.id as t1_id, t1.data, s.join_key as t2_id, s.t2_type, s.info, s.t3_type, s.name
FROM table1 t1
LEFT JOIN
(SELECT t2.id AS join_key, t2.info, t3.name, t2.type as t2_type, t3.type as t3_type
FROM table2 t2
JOIN table3 t3 ON t2.type = t3.type
) s ON s.join_key = t1.id;
| t1_id | data | t2_id | t2_type | info | t3_type | name |
|---|---|---|---|---|---|---|
| 100 | Table 1 - ID 100 | 100 | 300 | Table 2 - ID 100, Type 300 | 300 | Table 3 - Type 300 |
| 100 | Table 1 - ID 100 | 100 | 301 | Table 2 - ID 100, Type 301 | 301 | Table 3 - Type 301 |
| 100 | Table 1 - ID 100 | 100 | 302 | Table 2 - ID 100, Type 302 | 302 | Table 3 - Type 302 |
| 101 | Table 1 - ID 101 | 101 | 301 | Table 2 - ID 101, Type 301 | 301 | Table 3 - Type 301 |
| 101 | Table 1 - ID 101 | 101 | 302 | Table 2 - ID 101, Type 302 | 302 | Table 3 - Type 302 |
| 102 | Table 1 - ID 102 | |||||
| 103 | Table 1 - ID 103 | 103 | 300 | Table 2 - ID 103, Type 300 | 300 | Table 3 - Type 300 |
| 103 | Table 1 - ID 103 | 103 | 302 | Table 2 - ID 103, Type 302 | 302 | Table 3 - Type 302 |
| 103 | Table 1 - ID 103 | 103 | 303 | Table 2 - ID 103, Type 303 | 303 | Table 3 - Type 303 |
| 104 | Table 1 - ID 104 |
SELECT t1.id as t1_id, t1.data, t2.id as t2_id, t2.type as t2_type, t2.info, t3.type as t3_type, t3.name
FROM table1 t1
LEFT JOIN table2 t2 ON t1.id = t2.id
JOIN table3 t3 ON t2.type = t3.type;
| t1_id | data | t2_id | t2_type | info | t3_type | name |
|---|---|---|---|---|---|---|
| 100 | Table 1 - ID 100 | 100 | 300 | Table 2 - ID 100, Type 300 | 300 | Table 3 - Type 300 |
| 103 | Table 1 - ID 103 | 103 | 300 | Table 2 - ID 103, Type 300 | 300 | Table 3 - Type 300 |
| 100 | Table 1 - ID 100 | 100 | 301 | Table 2 - ID 100, Type 301 | 301 | Table 3 - Type 301 |
| 101 | Table 1 - ID 101 | 101 | 301 | Table 2 - ID 101, Type 301 | 301 | Table 3 - Type 301 |
| 100 | Table 1 - ID 100 | 100 | 302 | Table 2 - ID 100, Type 302 | 302 | Table 3 - Type 302 |
| 101 | Table 1 - ID 101 | 101 | 302 | Table 2 - ID 101, Type 302 | 302 | Table 3 - Type 302 |
| 103 | Table 1 - ID 103 | 103 | 302 | Table 2 - ID 103, Type 302 | 302 | Table 3 - Type 302 |
| 103 | Table 1 - ID 103 | 103 | 303 | Table 2 - ID 103, Type 303 | 303 | Table 3 - Type 303 |
除非我错过了什么,否则 Q1 和 Q4 的输出是相同的(所以@leftjoin 的答案是正确的);其他查询的输出都彼此不同(以及来自 Q1 和 Q4)。
【讨论】: