如何优化执行时间过长的存储过程?
How to optimize a stored procedure that takes too long to execute?
我写了一个存储过程来生成随机短信 records/events。
插入120万行时,查询耗时数百分钟
exec insert_random_sms 1200000
我以 'procedural' 的方式对存储过程进行了编码。但是,据我所知,SQL 在这方面效率不高。
create proc insert_random_sms
@number_of_records int
as
begin
declare @cnt int = 0; -- loop counter
declare @phone_id int;
declare @dest_id int;
while (@cnt < @number_of_records)
begin
declare @charge int = rand() * 100; -- will generate a random charge value between 0 and 100.
declare @tarrif_plan int = round(rand() * 5, 0);
select top 1 @phone_id = phone_no
from tbl_phone_agenda
order by newid();
select top 1 @dest_id = phone_no
from tbl_phone_agenda
order by newid();
insert into tbl_sms (phone_id, dest_id, charge, tarrif_plan)
values (@phone_id, @dest_id, @charge,
convert(nvarchar(50), @tarrif_plan));
set @cnt += 1;
end
end
go
如何优化这个存储过程?
我喜欢用来生成 x 条记录的方法是堆叠 CTE 方法(读过 this article 作者 Aaron Bert运行d,他将堆叠 CTE 方法归功于 Itzik Ben-Gan ):
WITH N1 (N) AS
( SELECT 1
FROM (VALUES
(1), (1), (1), (1), (1),
(1), (1), (1), (1), (1)
) n (Number)
),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2)
SELECT COUNT(*)
FROM N4
这只是从 10 行开始,并保持交叉连接,直到在上述情况下有 100,000,000 行。这就是我生成行的方式,
当您使用基于集合的方法时,您不能再单独使用 RAND()
,因为它是一个 运行 时间常数,以便对您需要的每一行进行新的评估将 RAND()
与每行唯一的 NEWID()
结合起来,因此以下内容将生成一个介于 0 和 100 之间的每行不同的 运行dom 数字:
SELECT CAST(ROUND(RAND(CHECKSUM(NEWID())) * 100, 0) AS INT)
接下来我要做的是将所有 phone 号码放入临时 table 中,以便它们具有顺序 ID(这将用于分配 运行domly) :
CREATE TABLE #Phone
(
ID INT IDENTITY NOT NULL PRIMARY KEY,
PhoneNo VARCHAR(50) NOT NULL
);
INSERT #Phone (PhoneNo)
SELECT PhoneNo
FROM tbl_phone_agenda;
因此您的最终查询将是
CREATE PROC insert_random_sms @number_of_records IN
AS
BEGIN
CREATE TABLE #Phone
(
ID INT IDENTITY NOT NULL PRIMARY KEY,
PhoneNo VARCHAR(50) NOT NULL
);
INSERT #Phone (PhoneNo)
SELECT PhoneNo
FROM tbl_phone_agenda;
-- NEEDED SO WE KNOW WHAT NUMBER TO GENERATE A RANDOM
-- NUMBER IN THE RIGHT RANGE LATER
DECLARE @PhoneCount INT = (SELECT COUNT(*) FROM #Phone);
WITH N1 (N) AS
( SELECT 1
FROM (VALUES
(1), (1), (1), (1), (1),
(1), (1), (1), (1), (1)
) n (Number)
),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2)
INSERT tbl_sms (phone_id, dest_id, charge, tarrif_plan)
SELECT TOP (@number_of_records)
p.PhoneNo,
d.PhoneNo,
Charge = CAST(ROUND(RAND(CHECKSUM(NEWID())) * 100, 0) AS INT),
tarrif_plan = CAST(ROUND(RAND(CHECKSUM(NEWID())) * 5, 0) AS INT)
FROM N4
INNER JOIN #Phone p
ON p.ID = CAST(CEILING(RAND(CHECKSUM(NEWID())) * @PhoneCount) AS INT)
INNER JOIN #Phone d
ON d.ID = CAST(CEILING(RAND(CHECKSUM(NEWID())) * @PhoneCount) AS INT)
END
在我的测试中,这个 运行 在大约 20-30 秒内生成 1.2m 记录,查找 100,000 phone 个数字。
通过从现有 table tbl_phone_agenda
中获取 random phone_no
的方式稍作改动,我在大约 50 秒内完成了 120 万条记录的插入。毫无疑问,GarethD 的解决方案是最快的。
-- create stored procedure to insert random records into the sms table, automatically | tried and tested
create proc insert_random_sms @number_of_records int
as
begin
declare @cnt int = 0; -- loop counter
declare @phone_id int;
declare @dest_id int;
while (@cnt < @number_of_records)
begin
declare @charge int = rand() * 100; -- will generate a random charge value between 0 and 100.
declare @tarrif_plan int = round(rand() * 5, 0);
-- here come the changes
select top 1 @phone_id = phone_no from tbl_phone_agenda where (abs(cast((binary_checksum(*) * rand()) as int)) % 100) < 10
select top 1 @dest_id = phone_no from tbl_phone_agenda where (abs(cast((binary_checksum(*) * rand()) as int)) % 100) < 10
insert into tbl_sms (phone_id, dest_id, charge, tariff_plan) values (@phone_id, @dest_id, @charge , convert(nvarchar(50), @tarrif_plan));
set @cnt += 1;
end
end
go
我的解决方案的灵感可以在这里找到:MSDN article - Selecting Rows Randomly from a Large Table
我写了一个存储过程来生成随机短信 records/events。
插入120万行时,查询耗时数百分钟
exec insert_random_sms 1200000
我以 'procedural' 的方式对存储过程进行了编码。但是,据我所知,SQL 在这方面效率不高。
create proc insert_random_sms
@number_of_records int
as
begin
declare @cnt int = 0; -- loop counter
declare @phone_id int;
declare @dest_id int;
while (@cnt < @number_of_records)
begin
declare @charge int = rand() * 100; -- will generate a random charge value between 0 and 100.
declare @tarrif_plan int = round(rand() * 5, 0);
select top 1 @phone_id = phone_no
from tbl_phone_agenda
order by newid();
select top 1 @dest_id = phone_no
from tbl_phone_agenda
order by newid();
insert into tbl_sms (phone_id, dest_id, charge, tarrif_plan)
values (@phone_id, @dest_id, @charge,
convert(nvarchar(50), @tarrif_plan));
set @cnt += 1;
end
end
go
如何优化这个存储过程?
我喜欢用来生成 x 条记录的方法是堆叠 CTE 方法(读过 this article 作者 Aaron Bert运行d,他将堆叠 CTE 方法归功于 Itzik Ben-Gan ):
WITH N1 (N) AS
( SELECT 1
FROM (VALUES
(1), (1), (1), (1), (1),
(1), (1), (1), (1), (1)
) n (Number)
),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2)
SELECT COUNT(*)
FROM N4
这只是从 10 行开始,并保持交叉连接,直到在上述情况下有 100,000,000 行。这就是我生成行的方式,
当您使用基于集合的方法时,您不能再单独使用 RAND()
,因为它是一个 运行 时间常数,以便对您需要的每一行进行新的评估将 RAND()
与每行唯一的 NEWID()
结合起来,因此以下内容将生成一个介于 0 和 100 之间的每行不同的 运行dom 数字:
SELECT CAST(ROUND(RAND(CHECKSUM(NEWID())) * 100, 0) AS INT)
接下来我要做的是将所有 phone 号码放入临时 table 中,以便它们具有顺序 ID(这将用于分配 运行domly) :
CREATE TABLE #Phone
(
ID INT IDENTITY NOT NULL PRIMARY KEY,
PhoneNo VARCHAR(50) NOT NULL
);
INSERT #Phone (PhoneNo)
SELECT PhoneNo
FROM tbl_phone_agenda;
因此您的最终查询将是
CREATE PROC insert_random_sms @number_of_records IN
AS
BEGIN
CREATE TABLE #Phone
(
ID INT IDENTITY NOT NULL PRIMARY KEY,
PhoneNo VARCHAR(50) NOT NULL
);
INSERT #Phone (PhoneNo)
SELECT PhoneNo
FROM tbl_phone_agenda;
-- NEEDED SO WE KNOW WHAT NUMBER TO GENERATE A RANDOM
-- NUMBER IN THE RIGHT RANGE LATER
DECLARE @PhoneCount INT = (SELECT COUNT(*) FROM #Phone);
WITH N1 (N) AS
( SELECT 1
FROM (VALUES
(1), (1), (1), (1), (1),
(1), (1), (1), (1), (1)
) n (Number)
),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2)
INSERT tbl_sms (phone_id, dest_id, charge, tarrif_plan)
SELECT TOP (@number_of_records)
p.PhoneNo,
d.PhoneNo,
Charge = CAST(ROUND(RAND(CHECKSUM(NEWID())) * 100, 0) AS INT),
tarrif_plan = CAST(ROUND(RAND(CHECKSUM(NEWID())) * 5, 0) AS INT)
FROM N4
INNER JOIN #Phone p
ON p.ID = CAST(CEILING(RAND(CHECKSUM(NEWID())) * @PhoneCount) AS INT)
INNER JOIN #Phone d
ON d.ID = CAST(CEILING(RAND(CHECKSUM(NEWID())) * @PhoneCount) AS INT)
END
在我的测试中,这个 运行 在大约 20-30 秒内生成 1.2m 记录,查找 100,000 phone 个数字。
通过从现有 table tbl_phone_agenda
中获取 random phone_no
的方式稍作改动,我在大约 50 秒内完成了 120 万条记录的插入。毫无疑问,GarethD 的解决方案是最快的。
-- create stored procedure to insert random records into the sms table, automatically | tried and tested
create proc insert_random_sms @number_of_records int
as
begin
declare @cnt int = 0; -- loop counter
declare @phone_id int;
declare @dest_id int;
while (@cnt < @number_of_records)
begin
declare @charge int = rand() * 100; -- will generate a random charge value between 0 and 100.
declare @tarrif_plan int = round(rand() * 5, 0);
-- here come the changes
select top 1 @phone_id = phone_no from tbl_phone_agenda where (abs(cast((binary_checksum(*) * rand()) as int)) % 100) < 10
select top 1 @dest_id = phone_no from tbl_phone_agenda where (abs(cast((binary_checksum(*) * rand()) as int)) % 100) < 10
insert into tbl_sms (phone_id, dest_id, charge, tariff_plan) values (@phone_id, @dest_id, @charge , convert(nvarchar(50), @tarrif_plan));
set @cnt += 1;
end
end
go
我的解决方案的灵感可以在这里找到:MSDN article - Selecting Rows Randomly from a Large Table