在单个 table 中使用 while 循环的多个 select 查询?可能吗?
Multiple select queries using while loop in a single table? Is it Possible?
我有 2 个 table。 Table A 有日期、ISBN(书籍)、需求(该日期的需求)。 Table B 有日期、ISBN(对于图书)和 SalesRank。
样本数据如下:
DailyBookFile 每个日期都有 150k 条记录,从 2010 年开始(即 150k * 365 天 * 8 年)行。 SalesRank Table 也是如此,每个日期有大约 50 万条记录
DailyBookFile
Date Isbn13 CurrentModifiedDemandTotal
20180122 9780955153075 13
20180122 9780805863567 9
20180122 9781138779396 1
20180122 9780029001516 9
20180122 9780470614150 42
SalesRank
importdate ISBN13 SalesRank
20180122 9780029001516 69499
20180122 9780470614150 52879
20180122 9780805863567 832429
20180122 9780955153075 44528
20180122 9781138779396 926435
Required Output
Date Avg_Rank Book_Group
20180122 385154 Elite
20180121 351545 Elite
20180120 201545 Elite
我想获取每天的Top 200 CurrentModifiedDemand,取平均Rank。
由于我是 SQL 的新手,因此无法找到解决方案。
我从昨天的前 200 个 CurrentModifiedDemand 开始,并获得了去年的平均排名。
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] AS AMA ON (DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate)
WHERE DBF.Isbn13 IN (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = 20180122
AND CAST(CurrentModifiedDemandTotal AS int) > 200)
AND DBF.Filedate > 20170101
GROUP BY DBF.Filedate;
但是结果不是我想要的。所以,现在我想要每天前 200 个 CurrentModifiedDemand 的 ISBN 及其平均排名。我试过这个。
DECLARE @i int;
SET @i = 20180122;
WHILE (SELECT DISTINCT(DBF.Filedate)
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
WHERE DBF.Filedate = @i) IS NOT NULL
BEGIN
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] as AMA ON DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate
WHERE DBF.Isbn13 in (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = @i
AND CAST (CurrentModifiedDemandTotal AS int) > 500)
AND DBF.Filedate = @i
GROUP BY DBF.Filedate;
SET @i = @i+1;
END
在此,我在每个 window 中得到一个 select 查询结果。有什么方法可以在单个 table 中得到结果?
P.S。每天前 200 本书的列表会根据 CurrentModifiedDemand 发生变化。我想拿他们的平均。当天的销售排名。
您可以将行插入临时 table(或 table 类型变量)和 select 所有内容,而不是在循环的每次迭代中立即 selecting循环结束后:
IF OBJECT_ID('tempdb..#books') IS NOT NULL
BEGIN
DROP TABLE #books
END
CREATE TABLE #books (
[Date] INT,
[Avg_Rank] FLOAT,
[Book_Group] VARCHAR(512)
);
DECLARE @i int;
SET @i = 20180122;
BEGIN TRY
WHILE (SELECT DISTINCT(DBF.Filedate)
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
WHERE DBF.Filedate = @i) IS NOT NULL
BEGIN
INSERT INTO #books (
[Date],
[Avg_Rank],
[Book_Group]
)
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] as AMA ON DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate
WHERE DBF.Isbn13 in (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = @i
AND CAST (CurrentModifiedDemandTotal AS int) > 500)
AND DBF.Filedate = @i
GROUP BY DBF.Filedate;
SET @i = @i+1;
END
END TRY
BEGIN CATCH
IF OBJECT_ID('tempdb..#books') IS NOT NULL
BEGIN
DROP TABLE #books
END
END CATCH
SELECT *
FROM #books
DROP TABLE #books
使用 table 类型的变量会产生更简单的代码,但是当存储大量数据时 table 类型的变量开始在性能上低于临时 tables。我不确定有多少行是截止的,但根据我的经验,我发现在 10000+ 行计数时将 table 类型的 var 更改为 temp table 会带来显着的性能提升。对于小行数,可能适用相反的情况。
这避免了 昂贵的 WHILE
循环,我相信可以实现您的目标:
CREATE TABLE #DailyBookFile ([Date] date,
Isbn13 bigint,
CurrentModifiedDemandTotal tinyint);
INSERT INTO #DailyBookFile
VALUES ('20180122',9780955153075,13),
('20180122',9780805863567,9 ),
('20180122',9781138779396,1 ),
('20180122',9780029001516,9 ),
('20180122',9780470614150,42);
CREATE TABLE #SalesRank (importdate date,
ISBN13 bigint,
#SalesRank int);
INSERT INTO #SalesRank
VALUES ('20180122',9780029001516,69499 ),
('20180122',9780470614150,52879 ),
('20180122',9780805863567,832429),
('20180122',9780955153075,44528 ),
('20180122',9781138779396,926435);
GO
WITH Ranks AS(
SELECT SR.*,
RANK() OVER (PARTITION By SR.importdate ORDER BY SR.#SalesRank) AS Ranking
FROM #SalesRank SR
JOIN #DailyBookFile DBF ON SR.ISBN13 = DBF.Isbn13
AND SR.importdate = DBF.[Date])
SELECT importdate AS [Date],
AVG(#SalesRank) AS Avg_rank,
'Elite' AS Book_Group
FROM Ranks
WHERE Ranking <= 200
GROUP BY importdate;
GO
DROP TABLE #DailyBookFile;
DROP TABLE #SalesRank;
我有 2 个 table。 Table A 有日期、ISBN(书籍)、需求(该日期的需求)。 Table B 有日期、ISBN(对于图书)和 SalesRank。
样本数据如下: DailyBookFile 每个日期都有 150k 条记录,从 2010 年开始(即 150k * 365 天 * 8 年)行。 SalesRank Table 也是如此,每个日期有大约 50 万条记录
DailyBookFile
Date Isbn13 CurrentModifiedDemandTotal
20180122 9780955153075 13
20180122 9780805863567 9
20180122 9781138779396 1
20180122 9780029001516 9
20180122 9780470614150 42
SalesRank
importdate ISBN13 SalesRank
20180122 9780029001516 69499
20180122 9780470614150 52879
20180122 9780805863567 832429
20180122 9780955153075 44528
20180122 9781138779396 926435
Required Output
Date Avg_Rank Book_Group
20180122 385154 Elite
20180121 351545 Elite
20180120 201545 Elite
我想获取每天的Top 200 CurrentModifiedDemand,取平均Rank。
由于我是 SQL 的新手,因此无法找到解决方案。
我从昨天的前 200 个 CurrentModifiedDemand 开始,并获得了去年的平均排名。
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] AS AMA ON (DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate)
WHERE DBF.Isbn13 IN (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = 20180122
AND CAST(CurrentModifiedDemandTotal AS int) > 200)
AND DBF.Filedate > 20170101
GROUP BY DBF.Filedate;
但是结果不是我想要的。所以,现在我想要每天前 200 个 CurrentModifiedDemand 的 ISBN 及其平均排名。我试过这个。
DECLARE @i int;
SET @i = 20180122;
WHILE (SELECT DISTINCT(DBF.Filedate)
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
WHERE DBF.Filedate = @i) IS NOT NULL
BEGIN
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] as AMA ON DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate
WHERE DBF.Isbn13 in (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = @i
AND CAST (CurrentModifiedDemandTotal AS int) > 500)
AND DBF.Filedate = @i
GROUP BY DBF.Filedate;
SET @i = @i+1;
END
在此,我在每个 window 中得到一个 select 查询结果。有什么方法可以在单个 table 中得到结果?
P.S。每天前 200 本书的列表会根据 CurrentModifiedDemand 发生变化。我想拿他们的平均。当天的销售排名。
您可以将行插入临时 table(或 table 类型变量)和 select 所有内容,而不是在循环的每次迭代中立即 selecting循环结束后:
IF OBJECT_ID('tempdb..#books') IS NOT NULL
BEGIN
DROP TABLE #books
END
CREATE TABLE #books (
[Date] INT,
[Avg_Rank] FLOAT,
[Book_Group] VARCHAR(512)
);
DECLARE @i int;
SET @i = 20180122;
BEGIN TRY
WHILE (SELECT DISTINCT(DBF.Filedate)
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
WHERE DBF.Filedate = @i) IS NOT NULL
BEGIN
INSERT INTO #books (
[Date],
[Avg_Rank],
[Book_Group]
)
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] as AMA ON DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate
WHERE DBF.Isbn13 in (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = @i
AND CAST (CurrentModifiedDemandTotal AS int) > 500)
AND DBF.Filedate = @i
GROUP BY DBF.Filedate;
SET @i = @i+1;
END
END TRY
BEGIN CATCH
IF OBJECT_ID('tempdb..#books') IS NOT NULL
BEGIN
DROP TABLE #books
END
END CATCH
SELECT *
FROM #books
DROP TABLE #books
使用 table 类型的变量会产生更简单的代码,但是当存储大量数据时 table 类型的变量开始在性能上低于临时 tables。我不确定有多少行是截止的,但根据我的经验,我发现在 10000+ 行计数时将 table 类型的 var 更改为 temp table 会带来显着的性能提升。对于小行数,可能适用相反的情况。
这避免了 昂贵的 WHILE
循环,我相信可以实现您的目标:
CREATE TABLE #DailyBookFile ([Date] date,
Isbn13 bigint,
CurrentModifiedDemandTotal tinyint);
INSERT INTO #DailyBookFile
VALUES ('20180122',9780955153075,13),
('20180122',9780805863567,9 ),
('20180122',9781138779396,1 ),
('20180122',9780029001516,9 ),
('20180122',9780470614150,42);
CREATE TABLE #SalesRank (importdate date,
ISBN13 bigint,
#SalesRank int);
INSERT INTO #SalesRank
VALUES ('20180122',9780029001516,69499 ),
('20180122',9780470614150,52879 ),
('20180122',9780805863567,832429),
('20180122',9780955153075,44528 ),
('20180122',9781138779396,926435);
GO
WITH Ranks AS(
SELECT SR.*,
RANK() OVER (PARTITION By SR.importdate ORDER BY SR.#SalesRank) AS Ranking
FROM #SalesRank SR
JOIN #DailyBookFile DBF ON SR.ISBN13 = DBF.Isbn13
AND SR.importdate = DBF.[Date])
SELECT importdate AS [Date],
AVG(#SalesRank) AS Avg_rank,
'Elite' AS Book_Group
FROM Ranks
WHERE Ranking <= 200
GROUP BY importdate;
GO
DROP TABLE #DailyBookFile;
DROP TABLE #SalesRank;