SQL SELECT 顶部 <dynamic> 基于 CATEGORY/PERCENTAGE
SQL SELECT TOP <dynamic> BASED ON CATEGORY/PERCENTAGE
这是我的示例数据:
在 11 行中,我需要 SELECT 前 60% 的行来自 CAT-1,30% 来自 CAT-2 和 10% 来自 CAT-3。有人可以帮我构建一个 SQL 吗?目标是 SQL 2014 数据库。
我还没有测试查询,但你应该可以使用 UNION ALL
SELECT TOP(60) PERCENT *
FROM Table1
WHERE Category = 'CAT-1'
UNION ALL
SELECT TOP(30) PERCENT *
FROM Table1
WHERE Category = 'CAT-2'
UNION ALL
SELECT TOP(10) PERCENT *
FROM Table1
WHERE Category = 'CAT-3'
显然,您必须定义一些 ORDER BY
标准,否则前 60% 将是任意结果。
这是一种动态 SQL 的方法。首先,我为客户和类别创建单独的表。然后生成一个SQL命令。看看:
CREATE TABLE #Cat(CatID INT IDENTITY PRIMARY KEY,Category VARCHAR(100),Percentage INT);
INSERT INTO #Cat(Category,Percentage) VALUES('CAT-1',60),('CAT-2',30),('CAT-3',10);
CREATE TABLE #Cust(CustID INT IDENTITY PRIMARY KEY
,Customer VARCHAR(100)
,CatID INT FOREIGN KEY REFERENCES #Cat(CatID));
INSERT INTO #Cust(Customer,CatID) VALUES
('A',1),('B',1),('C',1),('D',1),('E',1),('F',2),('G',2),('H',2),('I',3),('J',3),('K',1);
DECLARE @cmd VARCHAR(MAX)=
(
SELECT STUFF
(
(
SELECT 'UNION ALL SELECT TOP(' + CAST(c.Percentage AS VARCHAR(10)) + ') PERCENT * FROM #Cust WHERE CatID=' + CAST(c.CatID AS VARCHAR(10)) + ' '
FROM #Cat AS c
FOR XML PATH('')
),1,10,''
)
);
SET @cmd='SELECT tbl.CustID,tbl.Customer,tbl.CatID,c.Category,c.Percentage FROM(' + @cmd + ') AS tbl INNER JOIN #Cat AS c ON c.CatID=tbl.CatID';
--This is the generated query
SELECT @cmd;
--And this is its execution
EXEC (@cmd);
DROP TABLE #Cust;
DROP TABLE #Cat;
结果:
1 A 1 CAT-1 60
2 B 1 CAT-1 60
3 C 1 CAT-1 60
4 D 1 CAT-1 60
6 F 2 CAT-2 30
9 I 3 CAT-3 10
虽然你可以做到
DECLARE @N INT = 20
SELECT TOP (@n) PERCENT * FROM BLAH
我无法理解为您数据中的每个组设置 @N
的方法(CROSS APPLY
有人吗?)。
这是一个使用两个 CTE 的解决方案。它可能远非最佳:)
测试数据
SELECT *
INTO #Test
FROM (VALUES
(1, 'A', 'CAT-1', 60),
(2, 'B', 'CAT-1', 60),
(3, 'C', 'CAT-1', 60),
(4, 'D', 'CAT-1', 60),
(5, 'E', 'CAT-1', 60),
(6, 'F', 'CAT-2', 30),
(7, 'G', 'CAT-2', 30),
(8, 'H', 'CAT-2', 30),
(9, 'I', 'CAT-3', 10),
(10, 'J', 'CAT-3', 10),
(11, 'K', 'CAT-1', 60)
) A (RowID, Customer, Category, Percentage)
解决方案
在这里,我对第一个 CTE 中的每个组进行排名和计数,然后在第二个 CTE 中设置 'percentage bracket range'(例如,这是为了捕获前 10% 的查询,该查询只有两行,其中括号将是 50% 和 100%)。
;WITH Ranked AS (
SELECT *,
RANK() OVER (PARTITION BY Category ORDER BY RowId) * 100 RANK,
COUNT(*) OVER (PARTITION BY Category ) COUNT
FROM #Test),
Grouped AS (
SELECT *,
COALESCE(LAG(RANK) OVER (PARTITION BY Category order BY Rank) / COUNT, 0) BracketStart,
RANK / COUNT BracketEnd
FROM Ranked
)
SELECT
G.RowID
,G.Customer
,G.Category
FROM Grouped G
WHERE G.BracketEnd <= G.Percentage OR G.Percentage BETWEEN G.BracketStart AND G.BracketEnd
ORDER BY G.Category
RowID Customer Category
----------- -------- --------
1 A CAT-1
2 B CAT-1
3 C CAT-1
4 D CAT-1
6 F CAT-2
9 I CAT-3
我将此添加为新答案,因为我的第一个答案完全不同。用户 "Les H" 给我带来了这个:
--Credits to @Les H
SELECT *
INTO #Test
FROM (VALUES
(1, 'A', 'CAT-1', 60),
(2, 'B', 'CAT-1', 60),
(3, 'C', 'CAT-1', 60),
(4, 'D', 'CAT-1', 60),
(5, 'E', 'CAT-1', 60),
(6, 'F', 'CAT-2', 30),
(7, 'G', 'CAT-2', 30),
(8, 'H', 'CAT-2', 30),
(9, 'I', 'CAT-3', 10),
(10, 'J', 'CAT-3', 10),
(11, 'K', 'CAT-1', 60)
) A (RowID, Customer, Category, Percentage)
SELECT Percentages.*
FROM (SELECT DISTINCT Category,Percentage FROM #Test) AS c
CROSS APPLY(SELECT TOP (c.Percentage) PERCENT * FROM #Test WHERE #Test.Category=c.Category ORDER BY #Test.RowID) AS Percentages;
DROP TABLE #Test;
结果:
1 A CAT-1 60
2 B CAT-1 60
3 C CAT-1 60
4 D CAT-1 60
6 F CAT-2 30
9 I CAT-3 10
这是我的示例数据:
在 11 行中,我需要 SELECT 前 60% 的行来自 CAT-1,30% 来自 CAT-2 和 10% 来自 CAT-3。有人可以帮我构建一个 SQL 吗?目标是 SQL 2014 数据库。
我还没有测试查询,但你应该可以使用 UNION ALL
SELECT TOP(60) PERCENT *
FROM Table1
WHERE Category = 'CAT-1'
UNION ALL
SELECT TOP(30) PERCENT *
FROM Table1
WHERE Category = 'CAT-2'
UNION ALL
SELECT TOP(10) PERCENT *
FROM Table1
WHERE Category = 'CAT-3'
显然,您必须定义一些 ORDER BY
标准,否则前 60% 将是任意结果。
这是一种动态 SQL 的方法。首先,我为客户和类别创建单独的表。然后生成一个SQL命令。看看:
CREATE TABLE #Cat(CatID INT IDENTITY PRIMARY KEY,Category VARCHAR(100),Percentage INT);
INSERT INTO #Cat(Category,Percentage) VALUES('CAT-1',60),('CAT-2',30),('CAT-3',10);
CREATE TABLE #Cust(CustID INT IDENTITY PRIMARY KEY
,Customer VARCHAR(100)
,CatID INT FOREIGN KEY REFERENCES #Cat(CatID));
INSERT INTO #Cust(Customer,CatID) VALUES
('A',1),('B',1),('C',1),('D',1),('E',1),('F',2),('G',2),('H',2),('I',3),('J',3),('K',1);
DECLARE @cmd VARCHAR(MAX)=
(
SELECT STUFF
(
(
SELECT 'UNION ALL SELECT TOP(' + CAST(c.Percentage AS VARCHAR(10)) + ') PERCENT * FROM #Cust WHERE CatID=' + CAST(c.CatID AS VARCHAR(10)) + ' '
FROM #Cat AS c
FOR XML PATH('')
),1,10,''
)
);
SET @cmd='SELECT tbl.CustID,tbl.Customer,tbl.CatID,c.Category,c.Percentage FROM(' + @cmd + ') AS tbl INNER JOIN #Cat AS c ON c.CatID=tbl.CatID';
--This is the generated query
SELECT @cmd;
--And this is its execution
EXEC (@cmd);
DROP TABLE #Cust;
DROP TABLE #Cat;
结果:
1 A 1 CAT-1 60
2 B 1 CAT-1 60
3 C 1 CAT-1 60
4 D 1 CAT-1 60
6 F 2 CAT-2 30
9 I 3 CAT-3 10
虽然你可以做到
DECLARE @N INT = 20
SELECT TOP (@n) PERCENT * FROM BLAH
我无法理解为您数据中的每个组设置 @N
的方法(CROSS APPLY
有人吗?)。
这是一个使用两个 CTE 的解决方案。它可能远非最佳:)
测试数据
SELECT *
INTO #Test
FROM (VALUES
(1, 'A', 'CAT-1', 60),
(2, 'B', 'CAT-1', 60),
(3, 'C', 'CAT-1', 60),
(4, 'D', 'CAT-1', 60),
(5, 'E', 'CAT-1', 60),
(6, 'F', 'CAT-2', 30),
(7, 'G', 'CAT-2', 30),
(8, 'H', 'CAT-2', 30),
(9, 'I', 'CAT-3', 10),
(10, 'J', 'CAT-3', 10),
(11, 'K', 'CAT-1', 60)
) A (RowID, Customer, Category, Percentage)
解决方案
在这里,我对第一个 CTE 中的每个组进行排名和计数,然后在第二个 CTE 中设置 'percentage bracket range'(例如,这是为了捕获前 10% 的查询,该查询只有两行,其中括号将是 50% 和 100%)。
;WITH Ranked AS (
SELECT *,
RANK() OVER (PARTITION BY Category ORDER BY RowId) * 100 RANK,
COUNT(*) OVER (PARTITION BY Category ) COUNT
FROM #Test),
Grouped AS (
SELECT *,
COALESCE(LAG(RANK) OVER (PARTITION BY Category order BY Rank) / COUNT, 0) BracketStart,
RANK / COUNT BracketEnd
FROM Ranked
)
SELECT
G.RowID
,G.Customer
,G.Category
FROM Grouped G
WHERE G.BracketEnd <= G.Percentage OR G.Percentage BETWEEN G.BracketStart AND G.BracketEnd
ORDER BY G.Category
RowID Customer Category
----------- -------- --------
1 A CAT-1
2 B CAT-1
3 C CAT-1
4 D CAT-1
6 F CAT-2
9 I CAT-3
我将此添加为新答案,因为我的第一个答案完全不同。用户 "Les H" 给我带来了这个:
--Credits to @Les H
SELECT *
INTO #Test
FROM (VALUES
(1, 'A', 'CAT-1', 60),
(2, 'B', 'CAT-1', 60),
(3, 'C', 'CAT-1', 60),
(4, 'D', 'CAT-1', 60),
(5, 'E', 'CAT-1', 60),
(6, 'F', 'CAT-2', 30),
(7, 'G', 'CAT-2', 30),
(8, 'H', 'CAT-2', 30),
(9, 'I', 'CAT-3', 10),
(10, 'J', 'CAT-3', 10),
(11, 'K', 'CAT-1', 60)
) A (RowID, Customer, Category, Percentage)
SELECT Percentages.*
FROM (SELECT DISTINCT Category,Percentage FROM #Test) AS c
CROSS APPLY(SELECT TOP (c.Percentage) PERCENT * FROM #Test WHERE #Test.Category=c.Category ORDER BY #Test.RowID) AS Percentages;
DROP TABLE #Test;
结果:
1 A CAT-1 60
2 B CAT-1 60
3 C CAT-1 60
4 D CAT-1 60
6 F CAT-2 30
9 I CAT-3 10