我希望通过一次查询进行迭代。是否可以使用内联 JS ...?
以下是 BigQuery 标准 SQL
它使用JS UDF 在一次运行中完成所有迭代
这需要将整个表的数据传递给 UDF,因此绝对是 UDF limits / limitations 的主题
我怀疑这个解决方案是否具有真正的实用价值,但绝对有趣的是锻炼和探索未来的 BigQuery 功能
还要注意这里为简化 JS 代码和关注问题根源所做的一些假设。所以假设是:所有 id 字段都连续填充,没有从值 1 开始的间隙(如果你愿意,你可以改进它:o))
#standardSQL
CREATE TEMPORARY FUNCTION y(arr ARRAY<STRING>)
RETURNS ARRAY<STRUCT<id INT64, name STRING, duration INT64, start INT64, finish INT64, p1 INT64, p2 INT64, p3 INT64>>
LANGUAGE js AS """
var result = [], fin;
for (i = 0; i < arr.length; i++){result.push(JSON.parse(arr[i]))}
for (w = 0; w < 40; w++){ fin = true;
for (i = 0; i < arr.length; i++) {
if(result[i].start == null) { fin = false;
var x1 = parseInt(result[i].p1) - 1;
var x2 = parseInt(result[i].p2||result[i].p1) - 1;
var x3 = parseInt(result[i].p3||result[i].p1) - 1;
if(result[i].id == result[i].p1) {
result[i].start = 1;
result[i].finish = 1 + result[i].duration - 1;
} else if (result[x1].start !== null && result[x2].start !== null && result[x3].start !== null) {
result[i].start = Math.max(result[x1].finish, result[x2].finish, result[x3].finish) + 1;
result[i].finish = result[i].start + result[i].duration - 1;
}
}
} if (fin) {return result}
} return result;
""";
SELECT
id, name, duration,
DATE_ADD(DATE '2017-01-01', INTERVAL start - 1 DAY) start,
DATE_ADD(DATE '2017-01-01', INTERVAL finish - 1 DAY) finish,
p1, p2, p3
FROM (
SELECT rec.* FROM (
SELECT ARRAY_AGG(TO_JSON_STRING(t) ORDER BY id) AS data
FROM `yourTable` t
), UNNEST(y(data)) AS rec
) ORDER BY id
您可以使用下面的虚拟数据(来自您的问题)测试/玩上面的内容
#standardSQL
CREATE TEMPORARY FUNCTION y(arr ARRAY<STRING>)
RETURNS ARRAY<STRUCT<id INT64, name STRING, duration INT64, start INT64, finish INT64, p1 INT64, p2 INT64, p3 INT64>>
LANGUAGE js AS """
var result = [], fin;
for (i = 0; i < arr.length; i++){result.push(JSON.parse(arr[i]))}
for (w = 0; w < 40; w++){ fin = true;
for (i = 0; i < arr.length; i++) {
if(result[i].start == null) { fin = false;
var x1 = parseInt(result[i].p1) - 1;
var x2 = parseInt(result[i].p2||result[i].p1) - 1;
var x3 = parseInt(result[i].p3||result[i].p1) - 1;
if(result[i].id == result[i].p1) {
result[i].start = 1;
result[i].finish = 1 + result[i].duration - 1;
} else if (result[x1].start !== null && result[x2].start !== null && result[x3].start !== null) {
result[i].start = Math.max(result[x1].finish, result[x2].finish, result[x3].finish) + 1;
result[i].finish = result[i].start + result[i].duration - 1;
}
}
} if (fin) {return result}
} return result;
""";
WITH `yourTable` AS (SELECT * FROM (
SELECT NULL id, NULL name, NULL duration, 1 start, 1 finish, 1 p1, 1 p2, 1 p3 UNION ALL
SELECT 1, 'A', 14, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 2, 'B', 15, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 3, 'C', 15, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 4, 'D', 12, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 5, 'E', 22, NULL, NULL, 2, 3, NULL UNION ALL
SELECT 6, 'F', 14, NULL, NULL, 4, 1, NULL UNION ALL
SELECT 7, 'G', 9, NULL, NULL, 5, 6, NULL
) WHERE NOT id IS NULL
)
SELECT
id, name, duration,
DATE_ADD(DATE '2017-01-01', INTERVAL start - 1 DAY) start,
DATE_ADD(DATE '2017-01-01', INTERVAL finish - 1 DAY) finish,
p1, p2, p3
FROM (
SELECT rec.* FROM (
SELECT ARRAY_AGG(TO_JSON_STRING(t) ORDER BY id) AS data
FROM `yourTable` t
), UNNEST(y(data)) AS rec
) ORDER BY id
结果是
id name duration start finish p1 p2 p3
1 A 14 2017-01-01 2017-01-14 1 null null
2 B 15 2017-01-15 2017-01-29 1 null null
3 C 15 2017-01-15 2017-01-29 1 null null
4 D 12 2017-01-15 2017-01-26 1 null null
5 E 22 2017-01-30 2017-02-20 2 3 null
6 F 14 2017-01-27 2017-02-09 4 1 null
7 G 9 2017-02-21 2017-03-01 5 6 null