【发布时间】:2019-08-23 10:50:48
【问题描述】:
我正在使用 django 的 ORM 来运行一个进行一些聚合的查询,但我需要先从其他表中过滤参数。 问题是我的一张表非常大,有 400M 行,并且连接需要永远。 我的数据库在运行时负载很重,并且运行了几个小时。 我必须通过执行一些原始 sql 而不是使用 ORM 来优化此查询,但我需要有关如何解决巨大连接问题的帮助。
这是我的查询。大表是keyword_tracker_keywordranking,其余的都差不多。
SELECT DISTINCT
"kr"."offer_id",
"kr"."keyword_id",
"kr"."page_number",
COUNT(*) AS "tally",
AVG("kr"."ranking") AS "avg_rank",
"o"."brand" AS "brand",
("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date AS "date",
"kr"."search_result_kind" AS "kind"
FROM "keyword_tracker_keywordranking" "kr"
INNER JOIN "keyword_tracker_keyword" "k" ON ("kr"."keyword_id" = "k"."id")
INNER JOIN "keyword_tracker_keywordsettings" "ks" ON ("k"."id" = "ks"."keyword_id")
INNER JOIN "account_account" ON ("ks"."account_id" = "account_account"."id")
INNER JOIN "offers_boooffer" "o" ON ("kr"."offer_id" = "o"."id")
WHERE ("kr"."datetime" >= '2019-08-21' AND "kr"."datetime" < '2019-08-22' AND "account_account"."client" = 'client')
GROUP BY
"kr"."offer_id",
"kr"."keyword_id",
"kr"."search_result_kind",
"kr"."page_number",
"o"."brand",
("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date
这里是解释。我们可以看到它永远在做一个嵌套循环:
[
{
"Plan": {
"Node Type": "Unique",
"Parallel Aware": false,
"Startup Cost": 4983963.55,
"Total Cost": 4983963.58,
"Plan Rows": 1,
"Plan Width": 72,
"Actual Startup Time": 411341.511,
"Actual Total Time": 411341.511,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"(count(*))",
"(avg(kr.ranking))",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind"
],
"Plans": [
{
"Node Type": "Sort",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 4983963.55,
"Total Cost": 4983963.56,
"Plan Rows": 1,
"Plan Width": 72,
"Actual Startup Time": 411341.51,
"Actual Total Time": 411341.51,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"(count(*))",
"(avg(kr.ranking))",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind"
],
"Sort Key": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"(count(*))",
"(avg(kr.ranking))",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind"
],
"Sort Method": "quicksort",
"Sort Space Used": 25,
"Sort Space Type": "Memory",
"Plans": [
{
"Node Type": "Aggregate",
"Strategy": "Sorted",
"Partial Mode": "Simple",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 4983963.5,
"Total Cost": 4983963.54,
"Plan Rows": 1,
"Plan Width": 72,
"Actual Startup Time": 411341.463,
"Actual Total Time": 411341.463,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"count(*)",
"avg(kr.ranking)",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind"
],
"Group Key": [
"kr.offer_id",
"kr.keyword_id",
"kr.search_result_kind",
"kr.page_number",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)"
],
"Plans": [
{
"Node Type": "Sort",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 4983963.5,
"Total Cost": 4983963.51,
"Plan Rows": 1,
"Plan Width": 36,
"Actual Startup Time": 411341.461,
"Actual Total Time": 411341.461,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind",
"kr.ranking"
],
"Sort Key": [
"kr.offer_id",
"kr.keyword_id",
"kr.search_result_kind",
"kr.page_number",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)"
],
"Sort Method": "quicksort",
"Sort Space Used": 25,
"Sort Space Type": "Memory",
"Plans": [
{
"Node Type": "Nested Loop",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 1.85,
"Total Cost": 4983963.49,
"Plan Rows": 1,
"Plan Width": 36,
"Actual Startup Time": 411341.455,
"Actual Total Time": 411341.455,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"o.brand",
"(timezone('Europe/Paris'::text, kr.datetime))::date",
"kr.search_result_kind",
"kr.ranking"
],
"Inner Unique": true,
"Plans": [
{
"Node Type": "Nested Loop",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 1.42,
"Total Cost": 4983960.83,
"Plan Rows": 1,
"Plan Width": 31,
"Actual Startup Time": 411341.455,
"Actual Total Time": 411341.455,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"kr.ranking",
"kr.datetime",
"kr.search_result_kind"
],
"Inner Unique": true,
"Join Filter": "(ks.account_id = account_account.id)",
"Rows Removed by Join Filter": 0,
"Plans": [
{
"Node Type": "Nested Loop",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 1.42,
"Total Cost": 4983958.91,
"Plan Rows": 1,
"Plan Width": 35,
"Actual Startup Time": 286863.945,
"Actual Total Time": 382461.602,
"Actual Rows": 4879993,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"kr.ranking",
"kr.datetime",
"kr.search_result_kind",
"ks.account_id"
],
"Inner Unique": false,
"Join Filter": "(kr.keyword_id = ks.keyword_id)",
"Rows Removed by Join Filter": 0,
"Plans": [
{
"Node Type": "Nested Loop",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 1,
"Total Cost": 4983957.47,
"Plan Rows": 1,
"Plan Width": 35,
"Actual Startup Time": 286841.365,
"Actual Total Time": 373503.135,
"Actual Rows": 4272224,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"kr.ranking",
"kr.datetime",
"kr.search_result_kind",
"k.id"
],
"Inner Unique": true,
"Plans": [
{
"Node Type": "Index Scan",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "keyword_tracker_keywordranking_id_datetime_idx",
"Relation Name": "keyword_tracker_keywordranking",
"Schema": "public",
"Alias": "kr",
"Startup Cost": 0.57,
"Total Cost": 4983954.82,
"Plan Rows": 1,
"Plan Width": 31,
"Actual Startup Time": 286821.372,
"Actual Total Time": 325753.923,
"Actual Rows": 4272224,
"Actual Loops": 1,
"Output": [
"kr.id",
"kr.page_url",
"kr.marketplace",
"kr.page_number",
"kr.ranking",
"kr.product_title",
"kr.brand",
"kr.image_url",
"kr.nb_comments",
"kr.prime",
"kr.rating",
"kr.keyword_id",
"kr.offer_id",
"kr.asin",
"kr.nb_results",
"kr.parent_offer_id",
"kr.amazon_choice_id",
"kr.bestseller_id",
"kr.price",
"kr.is_add_on",
"kr.datetime",
"kr.search_result_kind",
"kr.created",
"kr.modified"
],
"Index Cond": "((kr.datetime >= '2019-08-21 00:00:00+00'::timestamp with time zone) AND (kr.datetime < '2019-08-22 00:00:00+00'::timestamp with time zone))",
"Rows Removed by Index Recheck": 0
},
{
"Node Type": "Index Only Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "keyword_tracker_keyword_pkey",
"Relation Name": "keyword_tracker_keyword",
"Schema": "public",
"Alias": "k",
"Startup Cost": 0.43,
"Total Cost": 2.65,
"Plan Rows": 1,
"Plan Width": 4,
"Actual Startup Time": 0.011,
"Actual Total Time": 0.011,
"Actual Rows": 1,
"Actual Loops": 4272224,
"Output": [
"k.id"
],
"Index Cond": "(k.id = kr.keyword_id)",
"Rows Removed by Index Recheck": 0,
"Heap Fetches": 27999076
}
]
},
{
"Node Type": "Index Only Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "keyword_tracker_keywords_keyword_id_account_id_2d8e95e8_uniq",
"Relation Name": "keyword_tracker_keywordsettings",
"Schema": "public",
"Alias": "ks",
"Startup Cost": 0.42,
"Total Cost": 1.43,
"Plan Rows": 1,
"Plan Width": 8,
"Actual Startup Time": 0.002,
"Actual Total Time": 0.002,
"Actual Rows": 1,
"Actual Loops": 4272224,
"Output": [
"ks.keyword_id",
"ks.account_id"
],
"Index Cond": "(ks.keyword_id = k.id)",
"Rows Removed by Index Recheck": 0,
"Heap Fetches": 742872
}
]
},
{
"Node Type": "Seq Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Relation Name": "account_account",
"Schema": "public",
"Alias": "account_account",
"Startup Cost": 0,
"Total Cost": 1.91,
"Plan Rows": 1,
"Plan Width": 4,
"Actual Startup Time": 0.005,
"Actual Total Time": 0.005,
"Actual Rows": 0,
"Actual Loops": 4879993,
"Output": [
"account_account.id",
"account_account.name",
"account_account.client",
"account_account.created_on",
"account_account.scraping_frequency",
"account_account.created",
"account_account.modified"
],
"Filter": "((account_account.client)::text = 'client'::text)",
"Rows Removed by Filter": 75
}
]
},
{
"Node Type": "Index Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "offers_boooffer_pkey",
"Relation Name": "offers_boooffer",
"Schema": "public",
"Alias": "o",
"Startup Cost": 0.44,
"Total Cost": 2.66,
"Plan Rows": 1,
"Plan Width": 13,
"Actual Startup Time": 0,
"Actual Total Time": 0,
"Actual Rows": 0,
"Actual Loops": 0,
"Output": [
"o.brand",
"o.id"
],
"Index Cond": "(o.id = kr.offer_id)",
"Rows Removed by Index Recheck": 0
}
]
}
]
}
]
}
]
}
]
},
"Triggers": []
}
]
【问题讨论】:
-
请向我们展示使用
explain (analyze, buffers, FORMAT TEXT)生成的执行计划,JSON 格式不适用于人类 -
对于大数据,您可以使用视图。加入您需要加入的表并从该输出中创建一个视图,然后您可以在需要时使用该视图。
-
请在您的问题中添加相关的 DDL(+一些关于基数的信息)。
标签: sql postgresql query-optimization query-performance