【发布时间】:2019-10-25 11:30:21
【问题描述】:
我需要您的帮助来编写满足以下要求的 Oracle SQL 查询。
来源数据如下:
CLAIM_NUMBER ADDRESS_1 ADDRESS_2
1001 220 CHIRAG ST OHIO
1003 220 ST CHIRAG OHIO
1004 19874 CURLEY AVE ORLANDO
1005 10874 CURLEY AVE ORLAND
1002 220 CHIRAG ST OHIO
1006 579 MLK Dr NASHVILLE
要求:如果 ADDRESS_1||ADDRESS_2>90% 匹配表中的另一条记录,则生成组 ID(随机唯一 ID)
CLAIM_NUMBER ADDRESS_1 ADDRESS_2 group_id
1001 220 CHIRAG ST OHIO 134
1003 220 ST CHIRAG OHIO 134
1004 19874 CURLEY AVE ORLANDO 2985
1005 10874 CURLEY AVE ORLAND 2985
1002 220 CHIRAG ST OHIO 134
1006 579 MLK Dr NASHVILLE 3098
WITH ACCT_GRP AS
(
SELECT NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_1)),'NA')||NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_2)),'NA') CONCAT ,
CASE WHEN
UTL_MATCH.JARO_WINKLER(
NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_1)),'NA')||NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_2)),'NA')
LAG(
NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_1)),'NA')||NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_2)),'NA')
) OVER
( ORDER BY
NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_1)),'NA')||NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_2)),'NA')
)
) *100<=90
THEN rownum
ELSE
null
END SIMILARITY_PCT
from src_table
),
ACCT_GRP_SUB AS
(
select concat,SIMILARITY_PCT from ACCT_GRP where SIMILARITY_PCT is not null
)
select
ACCOUNT_ID,ACCOUNT_ADDRESS_STR_1, ACCOUNT_ADDRESS_STR_2,
SIMILARITY_PCT
from src_table stg left join ACCT_GRP_SUB grp
on
case when
UTL_MATCH.JARO_WINKLER(
NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_1)),'NA')||NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_2)),'NA'),grp.concat
)*100>90
then grp.concat else null end = grp.concat
order by NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_1)),'NA')||NVL(UPPER(TRIM(ACCOUNT_ADDRESS_STR_2)),'NA');
【问题讨论】:
标签: string oracle grouping matching