以下是 BigQuery 标准 SQL
#standardSQL
SELECT
STRING_AGG(nbr, '|' ORDER BY pos) account_nbr,
STRING_AGG(name, '|' ORDER BY pos) account_name,
data
FROM `project.dataset.yourTable` t,
UNNEST(REGEXP_EXTRACT_ALL(REGEXP_REPLACE(t.data, r'[^\d]+', ','),r'[0-9]{7}')) nbr WITH OFFSET pos
JOIN `project.dataset.anotherTable` x
ON CAST(x.account AS STRING) = nbr
GROUP BY data
您可以使用问题中的虚拟数据来测试/玩它:
#standardSQL
WITH `project.dataset.yourTable` AS (
SELECT ";2435034;1;5.98;;eVar36=bopis|ev2=2605,;1483528;1;17.97;;ev6=bopis|evar52=2605,;1010203;1;7.98;;ev6=bopis|ev2=2605" data
), `project.dataset.anotherTable` AS (
SELECT 2435034 account, 'D1' name UNION ALL
SELECT 1483528, 'D2' UNION ALL
SELECT 1010203, 'D3'
)
SELECT
STRING_AGG(nbr, '|' ORDER BY pos) account_nbr,
STRING_AGG(name, '|' ORDER BY pos) account_name,
data
FROM `project.dataset.yourTable` t,
UNNEST(REGEXP_EXTRACT_ALL(REGEXP_REPLACE(t.data, r'[^\d]+', ','),r'[0-9]{7}')) nbr WITH OFFSET pos
JOIN `project.dataset.anotherTable` x
ON CAST(x.account AS STRING) = nbr
GROUP BY data
更新 cmets 中的新问题:如果 t.data 为空,则会过滤记录。有没有办法即使 t.data 为空也可以获得记录?在我的表中,一些记录对 t.data 没有价值
#standardSQL
WITH `project.dataset.yourTable` AS (
SELECT 1 id, ";2435031;1;5.98;;eVar36=bopis|ev2=2605,;1483528;1;17.97;;ev6=bopis|evar52=2605,;1010203;1;7.98;;ev6=bopis|ev2=2605" data UNION ALL
SELECT 2, NULL
), `project.dataset.anotherTable` AS (
SELECT 2435034 account, 'D1' name UNION ALL
SELECT 1483528, 'D2' UNION ALL
SELECT 1010203, 'D3'
)
SELECT
id,
(SELECT STRING_AGG(nbr, '|' ORDER BY pos)
FROM UNNEST(REGEXP_EXTRACT_ALL(
REGEXP_REPLACE(t.data, r'[^\d]+', ','),r'[0-9]{7}')) nbr WITH OFFSET pos
JOIN `project.dataset.anotherTable` x
ON CAST(x.account AS STRING) = nbr
) a_nbr,
(SELECT STRING_AGG(name, '|' ORDER BY pos)
FROM UNNEST(REGEXP_EXTRACT_ALL(
REGEXP_REPLACE(t.data, r'[^\d]+', ','),r'[0-9]{7}')) nbr WITH OFFSET pos
JOIN `project.dataset.anotherTable` x
ON CAST(x.account AS STRING) = nbr
) a_name,
data
FROM `project.dataset.yourTable` t
GROUP BY id, data