如果我理解正确,您有两组数据要删除:
- 在最近一次更改之前对客户数据(姓名、地址...)进行更改的所有行;
- 所有行在最新更改之后,其中只有
eff_dt 被更改,其他所有内容都相同。
如果是这种情况,您可以使用两个分析函数来找出客户数据最新变化的最短日期:
create table test_tab(id number, eff_dt date, name varchar2(20), address varchar2(50));
insert into test_tab values (1, to_date('01-jul-2018', 'dd-mon-yyyy'), 'Name 1', 'Address 1');
insert into test_tab values (1, to_date('15-jul-2018', 'dd-mon-yyyy'), 'Name 1', 'Address 1');
insert into test_tab values (1, to_date('01-aug-2018', 'dd-mon-yyyy'), 'Name 1 changed', 'Address 1 changed');
insert into test_tab values (1, to_date('05-aug-2018', 'dd-mon-yyyy'), 'Name 1 changed', 'Address 1 changed');
insert into test_tab values (1, to_date('10-aug-2018', 'dd-mon-yyyy'), 'Name 1 changed', 'Address 1 changed');
insert into test_tab values (2, to_date('12-jul-2018', 'dd-mon-yyyy'), 'Name 2', 'Address 2');
insert into test_tab values (2, to_date('18-jul-2018', 'dd-mon-yyyy'), 'Name 2', 'Address 2');
insert into test_tab values (3, to_date('15-jul-2018', 'dd-mon-yyyy'), 'Name 3', 'Address 3');
insert into test_tab values (3, to_date('18-jul-2018', 'dd-mon-yyyy'), 'Name 3 changed', 'Address 3 changed');
insert into test_tab values (3, to_date('25-jul-2018', 'dd-mon-yyyy'), 'Name 3 changed again', 'Address 3 changed again');
insert into test_tab values (3, to_date('12-aug-2018', 'dd-mon-yyyy'), 'Name 3 changed again', 'Address 3 changed again');
select id, eff_dt, name, address, -- rn, min_eff_dt
from (select id, eff_dt, name, address, -- min_eff_dt,
row_number() over (partition by id order by min_eff_dt desc) rn -- we need the highest minimum date - that is the date when last change in data took place (apart from eff_dt)
from (select id, eff_dt, name, address,
min(eff_dt) over (partition by id, name, address order by eff_dt) min_eff_dt -- minium dates of the customer's data changes
from test_tab))
where rn = 1;
您可以通过删除where rn = 1并将min_eff_dt添加到第二个select语句和rn, min_eff_dt到最上面的select语句来测试脚本,这样您就可以看到解析函数的结果。
您可以在威廉的回复中使用delete:
delete from test_tab
where rowid in
(select rowid
from (select row_number() over (partition by id order by min_eff_dt desc) rn -- we need the highest minimum date - that is the date when last change in data took place (apart from eff_dt)
from (select id,
min(eff_dt) over (partition by id, name, address order by eff_dt) min_eff_dt -- minium dates of the customer's data changes
from test_tab))
where rn > 1);