SQL SELECT 顶部 <dynamic> 基于 CATEGORY/PERCENTAGE

SQL SELECT TOP <dynamic> BASED ON CATEGORY/PERCENTAGE

这是我的示例数据:

在 11 行中,我需要 SELECT 前 60% 的行来自 CAT-1,30% 来自 CAT-2 和 10% 来自 CAT-3。有人可以帮我构建一个 SQL 吗?目标是 SQL 2014 数据库。

我还没有测试查询,但你应该可以使用 UNION ALL

  SELECT TOP(60) PERCENT *
  FROM Table1 
  WHERE Category = 'CAT-1'
UNION ALL
  SELECT TOP(30) PERCENT *
  FROM Table1 
  WHERE Category = 'CAT-2'
UNION ALL
  SELECT TOP(10) PERCENT *
  FROM Table1 
  WHERE Category = 'CAT-3'

显然,您必须定义一些 ORDER BY 标准,否则前 60% 将是任意结果。

这是一种动态 SQL 的方法。首先,我为客户和类别创建单独的表。然后生成一个SQL命令。看看:

CREATE TABLE #Cat(CatID INT IDENTITY PRIMARY KEY,Category VARCHAR(100),Percentage INT);
INSERT INTO #Cat(Category,Percentage) VALUES('CAT-1',60),('CAT-2',30),('CAT-3',10);

CREATE TABLE #Cust(CustID INT IDENTITY PRIMARY KEY
                  ,Customer VARCHAR(100)
                  ,CatID INT FOREIGN KEY REFERENCES #Cat(CatID));
INSERT INTO #Cust(Customer,CatID) VALUES
 ('A',1),('B',1),('C',1),('D',1),('E',1),('F',2),('G',2),('H',2),('I',3),('J',3),('K',1);

DECLARE @cmd VARCHAR(MAX)=
(
    SELECT STUFF
    (
        (
            SELECT 'UNION ALL SELECT TOP(' +  CAST(c.Percentage AS VARCHAR(10)) + ') PERCENT * FROM #Cust WHERE CatID=' + CAST(c.CatID AS  VARCHAR(10)) + ' '
            FROM #Cat AS c
            FOR XML PATH('')
        ),1,10,''
    )
);


SET @cmd='SELECT tbl.CustID,tbl.Customer,tbl.CatID,c.Category,c.Percentage FROM(' +  @cmd + ') AS tbl INNER JOIN #Cat AS c ON c.CatID=tbl.CatID';

--This is the generated query
SELECT @cmd;

--And this is its execution
EXEC (@cmd);

DROP TABLE #Cust;
DROP TABLE #Cat;

结果:

1   A   1   CAT-1   60
2   B   1   CAT-1   60
3   C   1   CAT-1   60
4   D   1   CAT-1   60
6   F   2   CAT-2   30
9   I   3   CAT-3   10

虽然你可以做到

DECLARE @N INT = 20
SELECT TOP (@n) PERCENT * FROM BLAH

我无法理解为您数据中的每个组设置 @N 的方法(CROSS APPLY 有人吗?)。

这是一个使用两个 CTE 的解决方案。它可能远非最佳:)

测试数据

SELECT *
INTO #Test
FROM (VALUES
(1, 'A', 'CAT-1', 60),
(2, 'B', 'CAT-1', 60),
(3, 'C', 'CAT-1', 60),
(4, 'D', 'CAT-1', 60),
(5, 'E', 'CAT-1', 60),
(6, 'F', 'CAT-2', 30),
(7, 'G', 'CAT-2', 30),
(8, 'H', 'CAT-2', 30),
(9, 'I', 'CAT-3', 10),
(10, 'J', 'CAT-3', 10),
(11, 'K', 'CAT-1', 60)
) A (RowID, Customer, Category, Percentage)

解决方案

在这里,我对第一个 CTE 中的每个组进行排名和计数,然后在第二个 CTE 中设置 'percentage bracket range'(例如,这是为了捕获前 10% 的查询,该查询只有两行,其中括号将是 50% 和 100%)。

;WITH Ranked AS (
    SELECT *,
        RANK() OVER (PARTITION BY Category ORDER BY RowId)  * 100 RANK,
        COUNT(*) OVER (PARTITION BY Category ) COUNT
    FROM #Test),
Grouped AS (
    SELECT *, 
    COALESCE(LAG(RANK) OVER (PARTITION BY Category order BY Rank) / COUNT, 0) BracketStart,
    RANK / COUNT BracketEnd
    FROM Ranked
)
SELECT 
    G.RowID
    ,G.Customer
    ,G.Category
FROM Grouped G
WHERE G.BracketEnd <= G.Percentage OR G.Percentage BETWEEN G.BracketStart AND G.BracketEnd
ORDER BY G.Category

RowID       Customer Category
----------- -------- --------
1           A        CAT-1
2           B        CAT-1
3           C        CAT-1
4           D        CAT-1
6           F        CAT-2
9           I        CAT-3

我将此添加为新答案,因为我的第一个答案完全不同。用户 "Les H" 给我带来了这个:

--Credits to @Les H
SELECT *
INTO #Test
FROM (VALUES
(1, 'A', 'CAT-1', 60),
(2, 'B', 'CAT-1', 60),
(3, 'C', 'CAT-1', 60),
(4, 'D', 'CAT-1', 60),
(5, 'E', 'CAT-1', 60),
(6, 'F', 'CAT-2', 30),
(7, 'G', 'CAT-2', 30),
(8, 'H', 'CAT-2', 30),
(9, 'I', 'CAT-3', 10),
(10, 'J', 'CAT-3', 10),
(11, 'K', 'CAT-1', 60)
) A (RowID, Customer, Category, Percentage)


 SELECT Percentages.*
 FROM (SELECT DISTINCT Category,Percentage FROM #Test) AS c
 CROSS APPLY(SELECT TOP (c.Percentage) PERCENT * FROM #Test WHERE #Test.Category=c.Category ORDER BY #Test.RowID) AS Percentages;

 DROP TABLE #Test;

结果:

1   A   CAT-1   60
2   B   CAT-1   60
3   C   CAT-1   60
4   D   CAT-1   60
6   F   CAT-2   30
9   I   CAT-3   10