【发布时间】:2021-12-22 00:21:13
【问题描述】:
我不是 Postgres 方面的专家,但我正试图了解这种奇怪的行为,也许你们中的一些人可能会给我一些见解。
这些是涉及的表和索引
表格
CREATE TABLE swp_am_hcbe_pro.submissions
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.submissions_id_seq'::regclass),
application_id bigint NOT NULL,
transaction_names_id bigint NOT NULL,
"timestamp" timestamp without time zone NOT NULL,
submission_status character varying(32) COLLATE pg_catalog."default" NOT NULL,
submission_type character varying(16) COLLATE pg_catalog."default" NOT NULL,
exit_code character varying(32) COLLATE pg_catalog."default",
ignore_partner_status boolean NOT NULL DEFAULT false,
ignore_sell_partner_status boolean NOT NULL DEFAULT false,
ignore_exclusion_rules boolean NOT NULL DEFAULT false,
dpa_iban character varying(32) COLLATE pg_catalog."default",
dpa_bic character varying(32) COLLATE pg_catalog."default",
dpa_id bigint,
dpa_blz bigint,
dda_iban character varying(32) COLLATE pg_catalog."default",
dda_bic character varying(32) COLLATE pg_catalog."default",
dda_id bigint,
dda_blz bigint,
dda_sepa_mandate_ref character varying(128) COLLATE pg_catalog."default",
use_different_sepa_mandate character varying(34) COLLATE pg_catalog."default",
use_manual_limit_extension boolean NOT NULL DEFAULT false,
use_automatic_limit_extension boolean NOT NULL DEFAULT false,
json_payload text COLLATE pg_catalog."default" NOT NULL,
final_timestamp timestamp without time zone,
CONSTRAINT submissions_pkey PRIMARY KEY (id),
CONSTRAINT submission_app_id FOREIGN KEY (application_id)
REFERENCES swp_am_hcbe_pro.applications (id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE CASCADE,
CONSTRAINT submission_transaction_names_id FOREIGN KEY (transaction_names_id)
REFERENCES swp_am_hcbe_pro.transaction_names (id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION,
CONSTRAINT chk_submission_status CHECK (submission_status::text = ANY (ARRAY['ERROR'::character varying, 'DENIED'::character varying, 'PROCESSED'::character varying, 'REJECTED'::character varying, 'PROCESSING'::character varying, 'SCHEDULED'::character varying]::text[])),
CONSTRAINT submission_types CHECK (submission_type::text = ANY (ARRAY['AUTO'::character varying, 'MANUAL'::character varying]::text[]))
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
CREATE TABLE swp_am_hcbe_pro.applications
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.applications_id_seq'::regclass),
correlation_id character varying(64) COLLATE pg_catalog."default" NOT NULL,
incoming_timestamp timestamp without time zone NOT NULL,
source_input character varying(16) COLLATE pg_catalog."default" NOT NULL,
source_file_path character varying(255) COLLATE pg_catalog."default",
application_type character varying(127) COLLATE pg_catalog."default" NOT NULL,
loan_id bigint,
vin character varying(17) COLLATE pg_catalog."default",
cooperation_name character varying(255) COLLATE pg_catalog."default",
cooperation_id bigint,
submitter_name character varying(255) COLLATE pg_catalog."default",
submitter_id bigint,
dealer_name character varying(255) COLLATE pg_catalog."default",
dealer_id bigint,
dealer_ext_id character varying(25) COLLATE pg_catalog."default",
invoice_id character varying(25) COLLATE pg_catalog."default",
stock_id character varying(20) COLLATE pg_catalog."default",
payment_term character varying(20) COLLATE pg_catalog."default",
reg_document_id character varying(25) COLLATE pg_catalog."default",
invoice_amount numeric(20,4),
application_status character varying(64) COLLATE pg_catalog."default",
dealer_group_id bigint,
approver text COLLATE pg_catalog."default",
approve_timestamp timestamp without time zone,
payload text COLLATE pg_catalog."default" NOT NULL,
auto_resub_attempts integer NOT NULL DEFAULT 0,
row_number bigint,
email_sent boolean DEFAULT false,
modified_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP,
product_name text COLLATE pg_catalog."default",
priority smallint,
CONSTRAINT applications_pkey PRIMARY KEY (id),
CONSTRAINT chk_application_status CHECK (application_status::text = ANY (ARRAY['PROCESSED'::character varying, 'PROCESSING'::character varying, 'WAIT_NEXT_SUBMISSION'::character varying, 'WAIT_MANUAL_SUBMISSION'::character varying, 'WAIT_AUTOMATIC_SUBMISSION'::character varying, 'WAIT_IN_QUEUE'::character varying, 'SUBMISSION_NOT_FOUND'::character varying, 'WAIT_FOR_ASYNC_ACTIVATION'::character varying, 'WAIT_FOR_ASYNC_SHIPMENT'::character varying, 'WAIT_FOR_BOOKING_CONFIRMATION'::character varying, 'WAIT_FOR_ACTIVATION_CONFIRMATION'::character varying, 'REJECTED'::character varying, 'NOT_IN_QUEUE'::character varying, 'SCHEDULED'::character varying]::text[])),
CONSTRAINT chk_source CHECK (source_input::text = ANY (ARRAY['LM'::character varying, 'KOSYFA'::character varying, 'SWPII'::character varying, 'ADM'::character varying]::text[]))
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
CREATE TABLE swp_am_hcbe_pro.transaction_names
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.transaction_names_id_seq'::regclass),
name character varying(32) COLLATE pg_catalog."default" NOT NULL,
sub_name character varying(32) COLLATE pg_catalog."default",
CONSTRAINT transaction_names_pkey PRIMARY KEY (id)
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
索引
CREATE INDEX submissions_app_id_asc_timestamp_desc_idx
ON swp_am_hcbe_pro.submissions USING btree
(application_id, "timestamp" DESC)
TABLESPACE pg_default;
CREATE INDEX submissions_app_id_timestamp_trans_name_id_idx
ON swp_am_hcbe_pro.submissions USING btree
(application_id, "timestamp", transaction_names_id)
TABLESPACE pg_default;
CREATE INDEX submissions_timestamp_asc_app_id_asc_idx
ON swp_am_hcbe_pro.submissions USING btree
("timestamp", application_id)
TABLESPACE pg_default;
CREATE INDEX application_correlation_id_idx
ON swp_am_hcbe_pro.applications USING btree
(correlation_id COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX application_correlation_row_number_idx
ON swp_am_hcbe_pro.applications USING btree
(correlation_id COLLATE pg_catalog."default", row_number)
TABLESPACE pg_default;
CREATE INDEX applications_application_status_idx
ON swp_am_hcbe_pro.applications USING btree
(application_status COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX applications_invoice_idx
ON swp_am_hcbe_pro.applications USING btree
(invoice_id COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX applications_vin_idx
ON swp_am_hcbe_pro.applications USING btree
(vin COLLATE pg_catalog."default")
TABLESPACE pg_default;
我有以下看法
CREATE OR REPLACE VIEW swp_am_hcbe_pro.application_list_simple AS
WITH subm AS (
SELECT DISTINCT ON (s.application_id) s.application_id,
s."timestamp",
s.exit_code,
s.transaction_names_id
FROM swp_am_hcbe_pro.submissions s
ORDER BY s.application_id, s."timestamp" DESC
)
SELECT app.id,
app.correlation_id,
app.source_input,
app.source_file_path,
app.application_type,
app.loan_id,
app.vin,
app.cooperation_name,
app.cooperation_id,
app.submitter_name,
app.submitter_id,
app.dealer_id,
app.dealer_name,
app.dealer_ext_id,
app.invoice_id,
app.stock_id,
app.payment_term,
app.reg_document_id,
app.invoice_amount,
app.application_status,
app.incoming_timestamp,
app.dealer_group_id,
app.approver,
app.approve_timestamp,
subm.exit_code,
tn.name AS transaction_name,
tn.sub_name AS sub_transaction_name,
tn.id AS transaction_type_id,
subm."timestamp" AS last_submission_timestamp,
app.modified_date
FROM swp_am_hcbe_pro.applications app
LEFT JOIN subm ON app.id = subm.application_id
LEFT JOIN swp_am_hcbe_pro.transaction_names tn ON tn.id = subm.transaction_names_id;
如果我运行这条语句,经过的时间是:Execution time: 2481.333 ms
explain analyze
SELECT *, count(*) OVER () AS total FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-08' AND INCOMING_TIMESTAMP <= '2021-11-09'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
我得到了以下
"Limit (cost=461799.85..461800.10 rows=100 width=490) (actual time=2473.878..2474.618 rows=100 loops=1)"
" -> Sort (cost=461799.85..461803.13 rows=1311 width=490) (actual time=2473.877..2474.612 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 112kB"
" -> WindowAgg (cost=458791.38..461749.74 rows=1311 width=490) (actual time=2471.792..2473.247 rows=1620 loops=1)"
" -> Hash Left Join (cost=458791.38..461720.25 rows=1311 width=482) (actual time=2456.132..2470.895 rows=1620 loops=1)"
" Hash Cond: (subm.transaction_names_id = tn.id)"
" CTE subm"
" -> Unique (cost=0.43..333656.64 rows=129297 width=31) (actual time=0.036..1846.992 rows=645062 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329433.26 rows=1689349 width=31) (actual time=0.033..1621.049 rows=1699582 loops=1)"
" -> Hash Right Join (cost=125133.09..128058.44 rows=1311 width=459) (actual time=2456.083..2470.337 rows=1620 loops=1)"
" Hash Cond: (subm.application_id = app.id)"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.038..2135.256 rows=645062 loops=1)"
" -> Hash (cost=125116.71..125116.71 rows=1311 width=361) (actual time=237.582..238.310 rows=1620 loops=1)"
" Buckets: 2048 Batches: 1 Memory Usage: 483kB"
" -> Gather (cost=1000.00..125116.71 rows=1311 width=361) (actual time=11.959..236.468 rows=1620 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..123985.61 rows=546 width=361) (actual time=2.880..97.484 rows=540 loops=3)"
" Filter: ((incoming_timestamp >= '2021-11-08 00:00:00'::timestamp without time zone) AND (incoming_timestamp <= '2021-11-09 00:00:00'::timestamp without time zone))"
" Rows Removed by Filter: 214530"
" -> Hash (cost=1.29..1.29 rows=29 width=31) (actual time=0.033..0.033 rows=29 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 10kB"
" -> Seq Scan on transaction_names tn (cost=0.00..1.29 rows=29 width=31) (actual time=0.011..0.015 rows=29 loops=1)"
"Planning time: 0.587 ms"
"Execution time: 2481.333 ms"
如果我运行这个只更改日期过滤器,它需要 Execution time: 365817.271 ms
explain analyze
SELECT *, count(*) OVER () AS total FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-09' AND INCOMING_TIMESTAMP <= '2021-11-10'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
"Limit (cost=462844.68..462844.69 rows=1 width=490) (actual time=365809.554..365810.419 rows=100 loops=1)"
" -> Sort (cost=462844.68..462844.69 rows=1 width=490) (actual time=365809.553..365810.411 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 125kB"
" -> WindowAgg (cost=334656.77..462844.67 rows=1 width=490) (actual time=365806.595..365808.483 rows=2140 loops=1)"
" -> Nested Loop Left Join (cost=334656.77..462844.65 rows=1 width=482) (actual time=2094.856..365793.839 rows=2140 loops=1)"
" CTE subm"
" -> Unique (cost=0.43..333656.64 rows=129297 width=31) (actual time=0.036..1771.818 rows=645068 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329433.26 rows=1689349 width=31) (actual time=0.034..1563.614 rows=1699595 loops=1)"
" -> Nested Loop Left Join (cost=1000.00..129187.86 rows=1 width=459) (actual time=2094.836..365762.361 rows=2140 loops=1)"
" Join Filter: (app.id = subm.application_id)"
" Rows Removed by Join Filter: 1380443382"
" -> Gather (cost=1000.00..124985.71 rows=1 width=361) (actual time=8.475..33.996 rows=2140 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..123985.61 rows=1 width=361) (actual time=1.809..103.597 rows=713 loops=3)"
" Filter: ((incoming_timestamp >= '2021-11-09 00:00:00'::timestamp without time zone) AND (incoming_timestamp <= '2021-11-10 00:00:00'::timestamp without time zone))"
" Rows Removed by Filter: 214359"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.030..125.740 rows=645068 loops=2140)"
" -> Index Scan using transaction_names_pkey on transaction_names tn (cost=0.14..0.16 rows=1 width=31) (actual time=0.009..0.009 rows=1 loops=2140)"
" Index Cond: (id = subm.transaction_names_id)"
"Planning time: 0.414 ms"
"Execution time: 365817.271 ms"
我真的不明白为什么会这样。我还尝试使用超过一个日期(例如一周、一个月)的过滤器运行查询,并且所有这些都可以正常工作。
我清理受影响的表,即使没有那么多行。我还能检查什么?
如果您需要更多信息,请随时问我
更新
如果我将查询更改为此,在字符串上使用to_timestamp,那么它可以工作。但是为什么它在所有其他情况下都有效,而不是在这种情况下呢?为什么总是在当前日期发生?
explain analyze
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= to_timestamp('2021-11-09 00:00:00','YYYY-MM-DD HH24:MI:SS')
AND INCOMING_TIMESTAMP <= to_timestamp('2021-11-10 00:00:00','YYYY-MM-DD HH24:MI:SS')
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0 ;
我得到以下
"Limit (cost=463151.72..463151.97 rows=100 width=481) (actual time=2743.036..2743.923 rows=100 loops=1)"
" -> Sort (cost=463151.72..463153.01 rows=517 width=481) (actual time=2743.035..2743.918 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 121kB"
" -> Hash Left Join (cost=460200.05..463126.79 rows=517 width=481) (actual time=2730.684..2741.744 rows=2382 loops=1)"
" Hash Cond: (subm.transaction_names_id = tn.id)"
" CTE subm"
" -> Unique (cost=0.43..333658.84 rows=129297 width=31) (actual time=0.020..1669.678 rows=645311 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329435.46 rows=1689349 width=31) (actual time=0.019..1476.827 rows=1700028 loops=1)"
" -> Hash Right Join (cost=126539.56..129464.91 rows=517 width=458) (actual time=2730.642..2740.999 rows=2382 loops=1)"
" Hash Cond: (subm.application_id = app.id)"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.023..1924.458 rows=645311 loops=1)"
" -> Hash (cost=126533.10..126533.10 rows=517 width=360) (actual time=736.655..737.534 rows=2382 loops=1)"
" Buckets: 4096 (originally 1024) Batches: 1 (originally 1) Memory Usage: 864kB"
" -> Gather (cost=1000.00..126533.10 rows=517 width=360) (actual time=18.882..734.265 rows=2382 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..125481.40 rows=215 width=360) (actual time=15.908..610.513 rows=794 loops=3)"
" Filter: ((incoming_timestamp >= to_timestamp('2021-11-09 00:00:00'::text, 'YYYY-MM-DD HH24:MI:SS'::text)) AND (incoming_timestamp <= to_timestamp('2021-11-10 00:00:00'::text, 'YYYY-MM-DD HH24:MI:SS'::text)))"
" Rows Removed by Filter: 214359"
" -> Hash (cost=1.29..1.29 rows=29 width=31) (actual time=0.026..0.026 rows=29 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 10kB"
" -> Seq Scan on transaction_names tn (cost=0.00..1.29 rows=29 width=31) (actual time=0.012..0.018 rows=29 loops=1)"
"Planning time: 0.370 ms"
"Execution time: 2751.279 ms"
所以,问题仍然存在
为什么这个查询需要 360 秒?
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-09' AND INCOMING_TIMESTAMP <= '2021-11-10'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
但是这个需要 3 秒
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= to_timestamp('2021-11-09 00:00:00','YYYY-MM-DD HH24:MI:SS')
AND INCOMING_TIMESTAMP <= to_timestamp('2021-11-10 00:00:00','YYYY-MM-DD HH24:MI:SS')
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0 ;
在任何其他情况下,无论我是否使用to_timestamp,它都有效。只是备注,我在上次更新中删除了count(*) over() 以表明它不相关,所以问题仍然存在。
感谢您的支持
【问题讨论】:
-
为了帮助您解决query-optimization 的问题,我们了解您的表和索引定义。请read this,然后edit您的问题。
-
@O.Jones,我很乐意添加表和索引定义,但在这种情况下,当查询在除 one 之外的任何情况下工作时,关系是什么?如果您查看这些计划,它们几乎是相同的。因此,索引或表在我对多个日期使用过滤器的任何情况下都有效。我更新了问题,显示相同的查询与
to_timestamp一起用于当前日期的过滤器 -
Rows Removed by Join Filter: 1380443382"是不同的。 [加上只使用了一个索引;可能是因为只找到了一个可用的索引?] -
@wildplasser,我明白了,但不管范围日期是什么,它仍然应该发生,不是吗?因此,如果我使用
to_timestamp它适用于这种情况以及任何其他情况。如果我不使用它,它适用于除当前日期以外的所有场景 -
Join Filter: (app.id = subm.application_id)"
标签: sql postgresql query-optimization postgresql-10