在连接时查询大型数据集(超过 15 万行)
Querying Large Dataset on Join (15+ million rows)
我正在尝试连接两个表,products
和 products_markets
。 products
不到一百万条记录,而 product_markets
接近 2000 万条记录。数据已更改,因此在模式创建表中可能有一两个错字:
CREATE TABLE `products_markets` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`product_id` int(10) unsigned NOT NULL,
`country_code_id` int(10) unsigned NOT NULL,
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `unique_index` (`product_id`,`country_code_id`)
) ENGINE=InnoDB AUTO_INCREMENT=21052102 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `products` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`manufacturer_id` int(10) unsigned NOT NULL,
`department_id` int(10) unsigned NOT NULL,
`code` varchar(100) COLLATE utf8mb4_unicode_ci NOT NULL,
`popularity` int(11) DEFAULT NULL,
`name` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
`value` bigint(20) unsigned NOT NULL,
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `products_code_unique` (`code`),
KEY `products_department_id_foreign` (`department_id`),
KEY `products_manufacturer_id_foreign` (`manufacturer_id`),
CONSTRAINT `products_department_id_foreign`
FOREIGN KEY (`department_id`) REFERENCES `departments` (`id`),
CONSTRAINT `products_manufacturer_id_foreign`
FOREIGN KEY (`manufacturer_id`) REFERENCES `manufacturers` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=731563 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
我正在尝试 return 特定国家/地区最流行产品的 50 条记录,我 运行 的时间大约为 ~50 秒,这似乎比预期的要长。
我尝试了几个不同的查询但没有成功:
select `products_markets`.`product_id`
from products_markets
left join
( SELECT products.id, products.popularity
from products
) p ON p.id = products_markets.product_id
where products_markets.country_code_id = 121
order by `popularity` desc, `p`.`id` asc
limit 50
和
select `products`.*
from `products`
where products.id in (
SELECT product_id
from products_markets
where products_markets.country_code_id = 121
)
group by `products`.`name`, `products`.`manufacturer_id`
order by `popularity` desc, `products`.`id` asc
limit 50
这个查询的解释是:
id select_type table type possible_keys key key_len refs rows extra
1 PRIMARY products ALL PRIMARY NULL NULL NULL 623848 Using temporary; Using filesort
1 PRIMARY products_markets ref unique_index unique_index 4 main.products.id 14 Using where; Using index; FirstMatch(products)
我喜欢的一个选择是将 products_markets 分成每个国家/地区的单独表格以减少查询。我试过向服务器添加更多内存但没有成功。任何人都可以找出数据库有什么明显的错误吗design/query?
还有哪些其他选项可以使此查询成为当前 ~50 秒的一小部分?
试试这个我所说的这个查询的意思,你首先 select 指定国家的所有产品来自 products_market table 而不是你 select 产品 table按人气,限制在50以内。尽量不要做产品。*和select只做那些需要数据的字段。
select products_markets.product_id, products_markets.county_code_id,
products.*
from products_markets,products
where products_markets.country_code_id = 121
and products_markets.product_id=products.id
group by `products`.`name`, `products`.`manufacturer_id`
order by `products_markets.popularity` desc, `products`.`id` asc
limit 50
删除products_markets
中的id
并添加
PRIMARY KEY(country_code_id, product_id)
然后删除 UNIQUE
键,除非其他查询需要它。
这将显着缩小那个大 table 的磁盘占用空间,从而有可能加快所有涉及它的查询。
这将有助于 Hamaza 建议的重新制定。
我正在尝试连接两个表,products
和 products_markets
。 products
不到一百万条记录,而 product_markets
接近 2000 万条记录。数据已更改,因此在模式创建表中可能有一两个错字:
CREATE TABLE `products_markets` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`product_id` int(10) unsigned NOT NULL,
`country_code_id` int(10) unsigned NOT NULL,
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `unique_index` (`product_id`,`country_code_id`)
) ENGINE=InnoDB AUTO_INCREMENT=21052102 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `products` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`manufacturer_id` int(10) unsigned NOT NULL,
`department_id` int(10) unsigned NOT NULL,
`code` varchar(100) COLLATE utf8mb4_unicode_ci NOT NULL,
`popularity` int(11) DEFAULT NULL,
`name` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
`value` bigint(20) unsigned NOT NULL,
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `products_code_unique` (`code`),
KEY `products_department_id_foreign` (`department_id`),
KEY `products_manufacturer_id_foreign` (`manufacturer_id`),
CONSTRAINT `products_department_id_foreign`
FOREIGN KEY (`department_id`) REFERENCES `departments` (`id`),
CONSTRAINT `products_manufacturer_id_foreign`
FOREIGN KEY (`manufacturer_id`) REFERENCES `manufacturers` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=731563 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
我正在尝试 return 特定国家/地区最流行产品的 50 条记录,我 运行 的时间大约为 ~50 秒,这似乎比预期的要长。
我尝试了几个不同的查询但没有成功:
select `products_markets`.`product_id`
from products_markets
left join
( SELECT products.id, products.popularity
from products
) p ON p.id = products_markets.product_id
where products_markets.country_code_id = 121
order by `popularity` desc, `p`.`id` asc
limit 50
和
select `products`.*
from `products`
where products.id in (
SELECT product_id
from products_markets
where products_markets.country_code_id = 121
)
group by `products`.`name`, `products`.`manufacturer_id`
order by `popularity` desc, `products`.`id` asc
limit 50
这个查询的解释是:
id select_type table type possible_keys key key_len refs rows extra
1 PRIMARY products ALL PRIMARY NULL NULL NULL 623848 Using temporary; Using filesort
1 PRIMARY products_markets ref unique_index unique_index 4 main.products.id 14 Using where; Using index; FirstMatch(products)
我喜欢的一个选择是将 products_markets 分成每个国家/地区的单独表格以减少查询。我试过向服务器添加更多内存但没有成功。任何人都可以找出数据库有什么明显的错误吗design/query?
还有哪些其他选项可以使此查询成为当前 ~50 秒的一小部分?
试试这个我所说的这个查询的意思,你首先 select 指定国家的所有产品来自 products_market table 而不是你 select 产品 table按人气,限制在50以内。尽量不要做产品。*和select只做那些需要数据的字段。
select products_markets.product_id, products_markets.county_code_id,
products.*
from products_markets,products
where products_markets.country_code_id = 121
and products_markets.product_id=products.id
group by `products`.`name`, `products`.`manufacturer_id`
order by `products_markets.popularity` desc, `products`.`id` asc
limit 50
删除products_markets
中的id
并添加
PRIMARY KEY(country_code_id, product_id)
然后删除 UNIQUE
键,除非其他查询需要它。
这将显着缩小那个大 table 的磁盘占用空间,从而有可能加快所有涉及它的查询。
这将有助于 Hamaza 建议的重新制定。