使用下面的方法
select date, customerid
from (
select *,
array_agg(customerid) over(order by pos range between 4 preceding and 2 preceding) bought_in_3_months_before_prev,
array_agg(customerid) over(order by pos range between 1 preceding and 1 preceding) bought_in_prev,
from (
select *, date_diff(date, '2000-01-01', month) pos
from `project.dataset.table`
)
) t, unnest(array(
select distinct id
from t.bought_in_3_months_before_prev id
where not id in (select * from t.bought_in_prev)
)) customerid
更新:
如果表中的数据量非常大导致内存/资源相关问题 - 使用以下方法
select * from (
select date_add(date, interval offset month) as date, customerid
from `project.dataset.table`, unnest([2,3,4]) offset
except distinct
select date_add(date, interval 1 month) as date, customerid
from `project.dataset.table`
)
where date <= (select max(date) from `project.dataset.table`)
以防万一您对 except distinct 运算符不满意 - 您可以使用以下版本和更常见/传统的 union distinct - 两个版本都非常不言自明,因此更多的是偏好问题
select date, customerid from (
select date_add(date, interval offset month) as date, customerid, true flag
from `project.dataset.table`, unnest([2,3,4]) offset
union distinct
select date_add(date, interval 1 month) as date, customerid, false flag
from `project.dataset.table`
)
where date <= (select max(date) from `project.dataset.table`)
group by date, customerid
having logical_and(flag)