Mysql 查询优化
Mysql Query Optimization
我有一个 table,里面有将近 700 万行。这是 table 结构
`CREATE TABLE `ERS_SALES_TRANSACTIONS` (
`saleId` int(12) NOT NULL AUTO_INCREMENT,
`ERS_COMPANY_CODE` int(3) DEFAULT NULL,
`SALE_SECTION` varchar(128) DEFAULT NULL,
`SALE_DATE` date DEFAULT NULL,
`SALE_STOCKAGE_EXACT` int(4) DEFAULT NULL,
`SALE_NET_AMOUNT` decimal(11,2) DEFAULT NULL,
`SALE_ABSOLUTE_CDATE` date DEFAULT NULL,
PRIMARY KEY (`saleId`),
KEY `index_location` (`ERS_COMPANY_CODE`),
KEY `idx-erscode-salesec` (`SALE_SECTION`,`ERS_COMPANY_CODE`) USING BTREE,
KEY `idx-saledate-section` (`SALE_DATE`,`SALE_SECTION`) USING BTREE
KEY `idx_quick_sales_transactions` (`ERS_COMPANY_CODE`,`SALE_SECTION`,`SALE_DATE`,`SALE_STOCKAGE_EXACT`,`SALE_NET_AMOUNT`)
) ENGINE=InnoDB;
此查询执行时间超过 7 秒,有什么方法可以加快速度吗?
SELECT
A.SALE_SECTION,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90, A.SALE_NET_AMOUNT, 0)) AS fs1_pd1_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180, A.SALE_NET_AMOUNT, 0)) AS fs2_pd1_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 181 AND 365, A.SALE_NET_AMOUNT, 0)) AS os1_pd1_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 366 AND 9999, A.SALE_NET_AMOUNT, 0)) AS os2_pd1_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30', A.SALE_NET_AMOUNT, 0)) AS TOTAL_PD1_SALE,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90, A.SALE_NET_AMOUNT, 0)) AS fs1_pd2_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180, A.SALE_NET_AMOUNT, 0)) AS fs2_pd2_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 181 AND 365, A.SALE_NET_AMOUNT, 0)) AS os1_pd2_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 366 AND 9999, A.SALE_NET_AMOUNT, 0)) AS os2_pd2_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30', A.SALE_NET_AMOUNT, 0)) AS TOTAL_PD2_SALE,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31'
AND A.SALE_ABSOLUTE_CDATE BETWEEN '2016-03-01' AND '2016-05-31', A.SALE_NET_AMOUNT, 0)) AS fs1_achived_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31'
AND A.SALE_ABSOLUTE_CDATE BETWEEN '2015-12-01' AND '2016-02-29', A.SALE_NET_AMOUNT, 0)) AS fs2_achived_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31'
AND A.SALE_ABSOLUTE_CDATE BETWEEN '2015-06-01' AND '2015-11-30', A.SALE_NET_AMOUNT, 0)) AS os1_achived_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31'
AND A.SALE_ABSOLUTE_CDATE BETWEEN '2006-12-26' AND '2015-05-31', A.SALE_NET_AMOUNT, 0)) AS os2_achived_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31', A.SALE_NET_AMOUNT, 0)) AS Total_ACHIVED_SALE
FROM ERS_SALES_TRANSACTIONS A WHERE A.ERS_COMPANY_CODE = 48 GROUP BY A.SALE_SECTION
这里是解释查询
{
"data":
[
{
"id": 1,
"select_type": "SIMPLE",
"table": "A",
"type": "ref",
"possible_keys": "index_location,idx-erscode-salesec,idx-saledate-section",
"key": "index_location",
"key_len": "5",
"ref": "const",
"rows": 1411944,
"Extra": "Using where; Using temporary; Using filesort"
}
]
}
添加复合索引后,时间减少到4.03秒。这是计划
{
"data":
[
{
"id": 1,
"select_type": "SIMPLE",
"table": "A",
"type": "ref",
"possible_keys": "index_location,idx-erscode-salesec,idx-saledate-section,idx_quick_sales_transactions",
"key_len": "5",
"key": "idx_quick_sales_transactions",
"ref": "const",
"rows": 1306058,
"Extra": "Using where"
}
]
}
SELECT
sales.SALE_SECTION,
SUM( fs1_pd1.SALE_NET_AMOUNT ) AS fs1_pd1_sale,
SUM( fs2_pd1.SALE_NET_AMOUNT ) AS fs2_pd1_sale,
...
FROM ERS_SALES_TRANSACTIONS sales
LEFT OUTER JOIN ERS_SALES_TRANSACTIONS fs1_pd1 ON sales.ERS_COMPANY_CODE = fs1_pd1.ERS_COMPANY_CODE AND sales.SALE_SECTION = fs1_pd1.SALE_SECTION
AND fs1_pd1.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND fs1_pd1.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90
LEFT OUTER JOIN ERS_SALES_TRANSACTIONS fs2_pd1 ON sales.ERS_COMPANY_CODE = fs2_pd1.ERS_COMPANY_CODE AND sales.SALE_SECTION = fs2_pd1.SALE_SECTION
AND fs2_pd1.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND fs2_pd1.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180
...
WHERE sales.ERS_COMPANY_CODE = 48
GROUP BY sales.SALE_SECTION
这样一来,优化器就可以为查询使用多个索引。
不过,我建议首先尝试@Thorsten Kettner 推荐的复合索引,因为这可能会产生相同的效果,而且复杂性要低得多。
我不同意 Jimmy B 的观点。我认为您的查询看起来很完美。
根据公司 48 的记录数量,应按顺序读取完整的 table(当数量很多时,例如占所有 table 记录的 50%)或 ERS_COMPANY_CODE 应该使用(当它不是那么多时,比如说,只有所有记录的 1%)。
由于DBMS决定在ERS_COMPANY_CODE上使用索引,所以后者应该是这样。
您可以尝试通过创建组合索引来进一步加快查询速度。至少 (ERS_COMPANY_CODE , SALE_SECTION)
,以便 GROUP BY
更快。最好甚至添加所有字段,这样可以从索引中收集所有数据,并且不必再访问 table 本身。
CREATE INDEX idx_quick_sales_transactions ON ERS_SALES_TRANSACTIONS
(ERS_COMPANY_CODE, SALE_SECTION, SALE_DATE, SALE_STOCKAGE_EXACT, SALE_NET_AMOUNT);
不知道有没有办法加快速度。但是,您可以尝试使用索引。我会在 ERS_SALES_TRANSACTIONS(ERS_COMPANY_CODE, SALE_SECTION, SALE_DATE, SALE_NET_AMOUNT)
.
上推荐一个
这是查询的覆盖索引,意味着用于查询的所有列都在索引中 -- 因此数据库引擎不需要访问原始数据页。
但是,性能仍然取决于与特定公司代码匹配的行数。并且,特别是用于聚合的文件排序的性能。
我有一个 table,里面有将近 700 万行。这是 table 结构
`CREATE TABLE `ERS_SALES_TRANSACTIONS` (
`saleId` int(12) NOT NULL AUTO_INCREMENT,
`ERS_COMPANY_CODE` int(3) DEFAULT NULL,
`SALE_SECTION` varchar(128) DEFAULT NULL,
`SALE_DATE` date DEFAULT NULL,
`SALE_STOCKAGE_EXACT` int(4) DEFAULT NULL,
`SALE_NET_AMOUNT` decimal(11,2) DEFAULT NULL,
`SALE_ABSOLUTE_CDATE` date DEFAULT NULL,
PRIMARY KEY (`saleId`),
KEY `index_location` (`ERS_COMPANY_CODE`),
KEY `idx-erscode-salesec` (`SALE_SECTION`,`ERS_COMPANY_CODE`) USING BTREE,
KEY `idx-saledate-section` (`SALE_DATE`,`SALE_SECTION`) USING BTREE
KEY `idx_quick_sales_transactions` (`ERS_COMPANY_CODE`,`SALE_SECTION`,`SALE_DATE`,`SALE_STOCKAGE_EXACT`,`SALE_NET_AMOUNT`)
) ENGINE=InnoDB;
此查询执行时间超过 7 秒,有什么方法可以加快速度吗?
SELECT
A.SALE_SECTION,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90, A.SALE_NET_AMOUNT, 0)) AS fs1_pd1_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180, A.SALE_NET_AMOUNT, 0)) AS fs2_pd1_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 181 AND 365, A.SALE_NET_AMOUNT, 0)) AS os1_pd1_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 366 AND 9999, A.SALE_NET_AMOUNT, 0)) AS os2_pd1_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30', A.SALE_NET_AMOUNT, 0)) AS TOTAL_PD1_SALE,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90, A.SALE_NET_AMOUNT, 0)) AS fs1_pd2_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180, A.SALE_NET_AMOUNT, 0)) AS fs2_pd2_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 181 AND 365, A.SALE_NET_AMOUNT, 0)) AS os1_pd2_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30'
AND A.SALE_STOCKAGE_EXACT BETWEEN 366 AND 9999, A.SALE_NET_AMOUNT, 0)) AS os2_pd2_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30', A.SALE_NET_AMOUNT, 0)) AS TOTAL_PD2_SALE,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31'
AND A.SALE_ABSOLUTE_CDATE BETWEEN '2016-03-01' AND '2016-05-31', A.SALE_NET_AMOUNT, 0)) AS fs1_achived_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31'
AND A.SALE_ABSOLUTE_CDATE BETWEEN '2015-12-01' AND '2016-02-29', A.SALE_NET_AMOUNT, 0)) AS fs2_achived_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31'
AND A.SALE_ABSOLUTE_CDATE BETWEEN '2015-06-01' AND '2015-11-30', A.SALE_NET_AMOUNT, 0)) AS os1_achived_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31'
AND A.SALE_ABSOLUTE_CDATE BETWEEN '2006-12-26' AND '2015-05-31', A.SALE_NET_AMOUNT, 0)) AS os2_achived_sale,
SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31', A.SALE_NET_AMOUNT, 0)) AS Total_ACHIVED_SALE
FROM ERS_SALES_TRANSACTIONS A WHERE A.ERS_COMPANY_CODE = 48 GROUP BY A.SALE_SECTION
这里是解释查询
{
"data":
[
{
"id": 1,
"select_type": "SIMPLE",
"table": "A",
"type": "ref",
"possible_keys": "index_location,idx-erscode-salesec,idx-saledate-section",
"key": "index_location",
"key_len": "5",
"ref": "const",
"rows": 1411944,
"Extra": "Using where; Using temporary; Using filesort"
}
]
}
添加复合索引后,时间减少到4.03秒。这是计划
{
"data":
[
{
"id": 1,
"select_type": "SIMPLE",
"table": "A",
"type": "ref",
"possible_keys": "index_location,idx-erscode-salesec,idx-saledate-section,idx_quick_sales_transactions",
"key_len": "5",
"key": "idx_quick_sales_transactions",
"ref": "const",
"rows": 1306058,
"Extra": "Using where"
}
]
}
SELECT
sales.SALE_SECTION,
SUM( fs1_pd1.SALE_NET_AMOUNT ) AS fs1_pd1_sale,
SUM( fs2_pd1.SALE_NET_AMOUNT ) AS fs2_pd1_sale,
...
FROM ERS_SALES_TRANSACTIONS sales
LEFT OUTER JOIN ERS_SALES_TRANSACTIONS fs1_pd1 ON sales.ERS_COMPANY_CODE = fs1_pd1.ERS_COMPANY_CODE AND sales.SALE_SECTION = fs1_pd1.SALE_SECTION
AND fs1_pd1.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND fs1_pd1.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90
LEFT OUTER JOIN ERS_SALES_TRANSACTIONS fs2_pd1 ON sales.ERS_COMPANY_CODE = fs2_pd1.ERS_COMPANY_CODE AND sales.SALE_SECTION = fs2_pd1.SALE_SECTION
AND fs2_pd1.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30'
AND fs2_pd1.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180
...
WHERE sales.ERS_COMPANY_CODE = 48
GROUP BY sales.SALE_SECTION
这样一来,优化器就可以为查询使用多个索引。
不过,我建议首先尝试@Thorsten Kettner 推荐的复合索引,因为这可能会产生相同的效果,而且复杂性要低得多。
我不同意 Jimmy B 的观点。我认为您的查询看起来很完美。
根据公司 48 的记录数量,应按顺序读取完整的 table(当数量很多时,例如占所有 table 记录的 50%)或 ERS_COMPANY_CODE 应该使用(当它不是那么多时,比如说,只有所有记录的 1%)。
由于DBMS决定在ERS_COMPANY_CODE上使用索引,所以后者应该是这样。
您可以尝试通过创建组合索引来进一步加快查询速度。至少 (ERS_COMPANY_CODE , SALE_SECTION)
,以便 GROUP BY
更快。最好甚至添加所有字段,这样可以从索引中收集所有数据,并且不必再访问 table 本身。
CREATE INDEX idx_quick_sales_transactions ON ERS_SALES_TRANSACTIONS
(ERS_COMPANY_CODE, SALE_SECTION, SALE_DATE, SALE_STOCKAGE_EXACT, SALE_NET_AMOUNT);
不知道有没有办法加快速度。但是,您可以尝试使用索引。我会在 ERS_SALES_TRANSACTIONS(ERS_COMPANY_CODE, SALE_SECTION, SALE_DATE, SALE_NET_AMOUNT)
.
这是查询的覆盖索引,意味着用于查询的所有列都在索引中 -- 因此数据库引擎不需要访问原始数据页。
但是,性能仍然取决于与特定公司代码匹配的行数。并且,特别是用于聚合的文件排序的性能。