在具有大量连接的大型 table 上优化 SELECT

Optimizing SELECT on large table with lots of join

我正在使用 django 的 ORM 来 运行 一个进行一些聚合的查询,但我需要先过滤来自其他 table 的参数。 问题是我的 table 之一非常大,有 400M 行,并且连接一直在进行。 我的数据库在 运行ning 和 运行s 几个小时内负载很重。 我必须通过做一些原始的 sql 而不是使用 ORM 来优化此查询,但我希望获得有关如何解决巨大连接问题的帮助。

这是我的查询。大的table是keyword_tracker_keywordranking,其余的都差不多

SELECT DISTINCT
    "kr"."offer_id",
    "kr"."keyword_id",
    "kr"."page_number",
    COUNT(*) AS "tally",
    AVG("kr"."ranking") AS "avg_rank",
    "o"."brand" AS "brand",
    ("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date AS "date",
    "kr"."search_result_kind" AS "kind"
FROM "keyword_tracker_keywordranking" "kr"
INNER JOIN "keyword_tracker_keyword" "k" ON ("kr"."keyword_id" = "k"."id")
INNER JOIN "keyword_tracker_keywordsettings" "ks" ON ("k"."id" = "ks"."keyword_id")
INNER JOIN "account_account" ON ("ks"."account_id" = "account_account"."id")
INNER JOIN "offers_boooffer" "o" ON ("kr"."offer_id" = "o"."id")
WHERE ("kr"."datetime" >= '2019-08-21' AND "kr"."datetime" < '2019-08-22' AND "account_account"."client" = 'client')
GROUP BY
    "kr"."offer_id",
    "kr"."keyword_id",
    "kr"."search_result_kind",
    "kr"."page_number",
    "o"."brand",
    ("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date

这是解释。我们可以看到它永远在做一个嵌套循环:

[
  {
    "Plan": {
      "Node Type": "Unique",
      "Parallel Aware": false,
      "Startup Cost": 4983963.55,
      "Total Cost": 4983963.58,
      "Plan Rows": 1,
      "Plan Width": 72,
      "Actual Startup Time": 411341.511,
      "Actual Total Time": 411341.511,
      "Actual Rows": 0,
      "Actual Loops": 1,
      "Output": [
        "kr.offer_id",
        "kr.keyword_id",
        "kr.page_number",
        "(count(*))",
        "(avg(kr.ranking))",
        "o.brand",
        "((timezone('Europe/Paris'::text, kr.datetime))::date)",
        "kr.search_result_kind"
      ],
      "Plans": [
        {
          "Node Type": "Sort",
          "Parent Relationship": "Outer",
          "Parallel Aware": false,
          "Startup Cost": 4983963.55,
          "Total Cost": 4983963.56,
          "Plan Rows": 1,
          "Plan Width": 72,
          "Actual Startup Time": 411341.51,
          "Actual Total Time": 411341.51,
          "Actual Rows": 0,
          "Actual Loops": 1,
          "Output": [
            "kr.offer_id",
            "kr.keyword_id",
            "kr.page_number",
            "(count(*))",
            "(avg(kr.ranking))",
            "o.brand",
            "((timezone('Europe/Paris'::text, kr.datetime))::date)",
            "kr.search_result_kind"
          ],
          "Sort Key": [
            "kr.offer_id",
            "kr.keyword_id",
            "kr.page_number",
            "(count(*))",
            "(avg(kr.ranking))",
            "o.brand",
            "((timezone('Europe/Paris'::text, kr.datetime))::date)",
            "kr.search_result_kind"
          ],
          "Sort Method": "quicksort",
          "Sort Space Used": 25,
          "Sort Space Type": "Memory",
          "Plans": [
            {
              "Node Type": "Aggregate",
              "Strategy": "Sorted",
              "Partial Mode": "Simple",
              "Parent Relationship": "Outer",
              "Parallel Aware": false,
              "Startup Cost": 4983963.5,
              "Total Cost": 4983963.54,
              "Plan Rows": 1,
              "Plan Width": 72,
              "Actual Startup Time": 411341.463,
              "Actual Total Time": 411341.463,
              "Actual Rows": 0,
              "Actual Loops": 1,
              "Output": [
                "kr.offer_id",
                "kr.keyword_id",
                "kr.page_number",
                "count(*)",
                "avg(kr.ranking)",
                "o.brand",
                "((timezone('Europe/Paris'::text, kr.datetime))::date)",
                "kr.search_result_kind"
              ],
              "Group Key": [
                "kr.offer_id",
                "kr.keyword_id",
                "kr.search_result_kind",
                "kr.page_number",
                "o.brand",
                "((timezone('Europe/Paris'::text, kr.datetime))::date)"
              ],
              "Plans": [
                {
                  "Node Type": "Sort",
                  "Parent Relationship": "Outer",
                  "Parallel Aware": false,
                  "Startup Cost": 4983963.5,
                  "Total Cost": 4983963.51,
                  "Plan Rows": 1,
                  "Plan Width": 36,
                  "Actual Startup Time": 411341.461,
                  "Actual Total Time": 411341.461,
                  "Actual Rows": 0,
                  "Actual Loops": 1,
                  "Output": [
                    "kr.offer_id",
                    "kr.keyword_id",
                    "kr.page_number",
                    "o.brand",
                    "((timezone('Europe/Paris'::text, kr.datetime))::date)",
                    "kr.search_result_kind",
                    "kr.ranking"
                  ],
                  "Sort Key": [
                    "kr.offer_id",
                    "kr.keyword_id",
                    "kr.search_result_kind",
                    "kr.page_number",
                    "o.brand",
                    "((timezone('Europe/Paris'::text, kr.datetime))::date)"
                  ],
                  "Sort Method": "quicksort",
                  "Sort Space Used": 25,
                  "Sort Space Type": "Memory",
                  "Plans": [
                    {
                      "Node Type": "Nested Loop",
                      "Parent Relationship": "Outer",
                      "Parallel Aware": false,
                      "Join Type": "Inner",
                      "Startup Cost": 1.85,
                      "Total Cost": 4983963.49,
                      "Plan Rows": 1,
                      "Plan Width": 36,
                      "Actual Startup Time": 411341.455,
                      "Actual Total Time": 411341.455,
                      "Actual Rows": 0,
                      "Actual Loops": 1,
                      "Output": [
                        "kr.offer_id",
                        "kr.keyword_id",
                        "kr.page_number",
                        "o.brand",
                        "(timezone('Europe/Paris'::text, kr.datetime))::date",
                        "kr.search_result_kind",
                        "kr.ranking"
                      ],
                      "Inner Unique": true,
                      "Plans": [
                        {
                          "Node Type": "Nested Loop",
                          "Parent Relationship": "Outer",
                          "Parallel Aware": false,
                          "Join Type": "Inner",
                          "Startup Cost": 1.42,
                          "Total Cost": 4983960.83,
                          "Plan Rows": 1,
                          "Plan Width": 31,
                          "Actual Startup Time": 411341.455,
                          "Actual Total Time": 411341.455,
                          "Actual Rows": 0,
                          "Actual Loops": 1,
                          "Output": [
                            "kr.offer_id",
                            "kr.keyword_id",
                            "kr.page_number",
                            "kr.ranking",
                            "kr.datetime",
                            "kr.search_result_kind"
                          ],
                          "Inner Unique": true,
                          "Join Filter": "(ks.account_id = account_account.id)",
                          "Rows Removed by Join Filter": 0,
                          "Plans": [
                            {
                              "Node Type": "Nested Loop",
                              "Parent Relationship": "Outer",
                              "Parallel Aware": false,
                              "Join Type": "Inner",
                              "Startup Cost": 1.42,
                              "Total Cost": 4983958.91,
                              "Plan Rows": 1,
                              "Plan Width": 35,
                              "Actual Startup Time": 286863.945,
                              "Actual Total Time": 382461.602,
                              "Actual Rows": 4879993,
                              "Actual Loops": 1,
                              "Output": [
                                "kr.offer_id",
                                "kr.keyword_id",
                                "kr.page_number",
                                "kr.ranking",
                                "kr.datetime",
                                "kr.search_result_kind",
                                "ks.account_id"
                              ],
                              "Inner Unique": false,
                              "Join Filter": "(kr.keyword_id = ks.keyword_id)",
                              "Rows Removed by Join Filter": 0,
                              "Plans": [
                                {
                                  "Node Type": "Nested Loop",
                                  "Parent Relationship": "Outer",
                                  "Parallel Aware": false,
                                  "Join Type": "Inner",
                                  "Startup Cost": 1,
                                  "Total Cost": 4983957.47,
                                  "Plan Rows": 1,
                                  "Plan Width": 35,
                                  "Actual Startup Time": 286841.365,
                                  "Actual Total Time": 373503.135,
                                  "Actual Rows": 4272224,
                                  "Actual Loops": 1,
                                  "Output": [
                                    "kr.offer_id",
                                    "kr.keyword_id",
                                    "kr.page_number",
                                    "kr.ranking",
                                    "kr.datetime",
                                    "kr.search_result_kind",
                                    "k.id"
                                  ],
                                  "Inner Unique": true,
                                  "Plans": [
                                    {
                                      "Node Type": "Index Scan",
                                      "Parent Relationship": "Outer",
                                      "Parallel Aware": false,
                                      "Scan Direction": "Forward",
                                      "Index Name": "keyword_tracker_keywordranking_id_datetime_idx",
                                      "Relation Name": "keyword_tracker_keywordranking",
                                      "Schema": "public",
                                      "Alias": "kr",
                                      "Startup Cost": 0.57,
                                      "Total Cost": 4983954.82,
                                      "Plan Rows": 1,
                                      "Plan Width": 31,
                                      "Actual Startup Time": 286821.372,
                                      "Actual Total Time": 325753.923,
                                      "Actual Rows": 4272224,
                                      "Actual Loops": 1,
                                      "Output": [
                                        "kr.id",
                                        "kr.page_url",
                                        "kr.marketplace",
                                        "kr.page_number",
                                        "kr.ranking",
                                        "kr.product_title",
                                        "kr.brand",
                                        "kr.image_url",
                                        "kr.nb_comments",
                                        "kr.prime",
                                        "kr.rating",
                                        "kr.keyword_id",
                                        "kr.offer_id",
                                        "kr.asin",
                                        "kr.nb_results",
                                        "kr.parent_offer_id",
                                        "kr.amazon_choice_id",
                                        "kr.bestseller_id",
                                        "kr.price",
                                        "kr.is_add_on",
                                        "kr.datetime",
                                        "kr.search_result_kind",
                                        "kr.created",
                                        "kr.modified"
                                      ],
                                      "Index Cond": "((kr.datetime >= '2019-08-21 00:00:00+00'::timestamp with time zone) AND (kr.datetime < '2019-08-22 00:00:00+00'::timestamp with time zone))",
                                      "Rows Removed by Index Recheck": 0
                                    },
                                    {
                                      "Node Type": "Index Only Scan",
                                      "Parent Relationship": "Inner",
                                      "Parallel Aware": false,
                                      "Scan Direction": "Forward",
                                      "Index Name": "keyword_tracker_keyword_pkey",
                                      "Relation Name": "keyword_tracker_keyword",
                                      "Schema": "public",
                                      "Alias": "k",
                                      "Startup Cost": 0.43,
                                      "Total Cost": 2.65,
                                      "Plan Rows": 1,
                                      "Plan Width": 4,
                                      "Actual Startup Time": 0.011,
                                      "Actual Total Time": 0.011,
                                      "Actual Rows": 1,
                                      "Actual Loops": 4272224,
                                      "Output": [
                                        "k.id"
                                      ],
                                      "Index Cond": "(k.id = kr.keyword_id)",
                                      "Rows Removed by Index Recheck": 0,
                                      "Heap Fetches": 27999076
                                    }
                                  ]
                                },
                                {
                                  "Node Type": "Index Only Scan",
                                  "Parent Relationship": "Inner",
                                  "Parallel Aware": false,
                                  "Scan Direction": "Forward",
                                  "Index Name": "keyword_tracker_keywords_keyword_id_account_id_2d8e95e8_uniq",
                                  "Relation Name": "keyword_tracker_keywordsettings",
                                  "Schema": "public",
                                  "Alias": "ks",
                                  "Startup Cost": 0.42,
                                  "Total Cost": 1.43,
                                  "Plan Rows": 1,
                                  "Plan Width": 8,
                                  "Actual Startup Time": 0.002,
                                  "Actual Total Time": 0.002,
                                  "Actual Rows": 1,
                                  "Actual Loops": 4272224,
                                  "Output": [
                                    "ks.keyword_id",
                                    "ks.account_id"
                                  ],
                                  "Index Cond": "(ks.keyword_id = k.id)",
                                  "Rows Removed by Index Recheck": 0,
                                  "Heap Fetches": 742872
                                }
                              ]
                            },
                            {
                              "Node Type": "Seq Scan",
                              "Parent Relationship": "Inner",
                              "Parallel Aware": false,
                              "Relation Name": "account_account",
                              "Schema": "public",
                              "Alias": "account_account",
                              "Startup Cost": 0,
                              "Total Cost": 1.91,
                              "Plan Rows": 1,
                              "Plan Width": 4,
                              "Actual Startup Time": 0.005,
                              "Actual Total Time": 0.005,
                              "Actual Rows": 0,
                              "Actual Loops": 4879993,
                              "Output": [
                                "account_account.id",
                                "account_account.name",
                                "account_account.client",
                                "account_account.created_on",
                                "account_account.scraping_frequency",
                                "account_account.created",
                                "account_account.modified"
                              ],
                              "Filter": "((account_account.client)::text = 'client'::text)",
                              "Rows Removed by Filter": 75
                            }
                          ]
                        },
                        {
                          "Node Type": "Index Scan",
                          "Parent Relationship": "Inner",
                          "Parallel Aware": false,
                          "Scan Direction": "Forward",
                          "Index Name": "offers_boooffer_pkey",
                          "Relation Name": "offers_boooffer",
                          "Schema": "public",
                          "Alias": "o",
                          "Startup Cost": 0.44,
                          "Total Cost": 2.66,
                          "Plan Rows": 1,
                          "Plan Width": 13,
                          "Actual Startup Time": 0,
                          "Actual Total Time": 0,
                          "Actual Rows": 0,
                          "Actual Loops": 0,
                          "Output": [
                            "o.brand",
                            "o.id"
                          ],
                          "Index Cond": "(o.id = kr.offer_id)",
                          "Rows Removed by Index Recheck": 0
                        }
                      ]
                    }
                  ]
                }
              ]
            }
          ]
        }
      ]
    },
    "Triggers": []
  }
]

这个索引应该有帮助:

CREATE INDEX ON keyword_tracker_keywordranking (id, datetime);

您在 datetime 上的条件很奇怪:它只选择记录 WHERE datetime = '2019-08-21 00:00:00'

估计值似乎有很大偏差,请通过

改进它们
ANALYZE keyword_tracker_keywordranking

从我的头顶来看 - kr 似乎是最重的,所以我会预先过滤它:

您可以做的其他步骤是预先 select - 因此对于连接语句中的相关表 - 只对您真正使用的列进行嵌套查询 selecting (在宽表的情况下,您只使用几列)。

SELECT DISTINCT
    "kr"."offer_id",
    "kr"."keyword_id",
    "kr"."page_number",
    COUNT(*) AS "tally",
    AVG("kr"."ranking") AS "avg_rank",
    "o"."brand" AS "brand",
    ("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date AS "date",
    "kr"."search_result_kind" AS "kind"
FROM (select * from "keyword_tracker_keywordranking" where "datetime" = '2019-08-21') "kr"
INNER JOIN "keyword_tracker_keyword" "k" ON ("kr"."keyword_id" = "k"."id")
INNER JOIN "keyword_tracker_keywordsettings" "ks" ON ("k"."id" = "ks"."keyword_id")
INNER JOIN (select * from "account_account" where "client" = 'client') "account_account" ON ("ks"."account_id" = "account_account"."id")
INNER JOIN "offers_boooffer" "o" ON ("kr"."offer_id" = "o"."id")
GROUP BY
    "kr"."offer_id",
    "kr"."keyword_id",
    "kr"."search_result_kind",
    "kr"."page_number",
    "o"."brand",
    ("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date