当累积列小于或等于目标时设置组号
Set a Group number when cumulative column less than or equal to target
我有一列包含一个网站有多少sub_links。
我的 table 模式是 id,sub_link,link
id sub_link link group
1 5 link1.com 1
2 2 link2.com 2
3 4 link3.com 3
4 1 link4.com 3
4 1 link4.com 3
5 2 link5.com 4
6 4 link6.com 5
7 3 link7.com 6
7 3 link7.com 6
我想在 sub_link 的总数等于或小于 5 时添加一个带有组号的列,同时避免重复。
我不知道从哪里开始,因为我只能编写 select 语句并进行一些连接。
也许有一种使用 Window 函数或 CTE 的方法,我更喜欢它来进一步提升我的经验。
这是一个fiddlelink
https://www.db-fiddle.com/f/6rmtcazWaWvLULZ5QgpmSb/1
感谢您的帮助。
根据您在下面的评论,除了在单独的临时 table 中跟踪滚动 Sub_link 总和之外,我不确定如何执行此操作。下面是一个例子。如果您只需要跟踪当前记录和先前记录的 Sub_link,可能是使用 LAG function.
的单个查询来完成的方法
USE [master]
GO
CREATE DATABASE [Test]
GO
USE [Test]
GO
CREATE TABLE [Test] (
ID INT
, Sub_Links TINYINT
, Link VARCHAR(100)
);
INSERT INTO [Test]
VALUES (1, 5, 'link1.com')
, (2, 2, 'link2.com')
, (3, 4, 'link3.com')
, (4, 1, 'link4.com')
, (4, 1, 'link4.com')
, (5, 2, 'link5.com')
, (6, 4, 'link6.com')
, (7, 3, 'link7.com')
, (7, 3, 'link7.com');
SET NOCOUNT ON
GO
CREATE TABLE #Staging
(
ID INT
, Link VARCHAR(100)
, Sub_Links INT
, GroupNum INT
, SublinkRollingSum TINYINT
)
GO
CREATE CLUSTERED INDEX [StagingOrder] ON #Staging(ID, Link) --Since need to guarantee order, doing this upfront should be more efficient
GO
INSERT INTO #Staging(ID, Link, Sub_Links)
SELECT DISTINCT --Don't include duplicate records
ID
, Link
, Sub_Links
FROM Test
ORDER BY ID, Link
GO
--CREATE INDEX [GroupIndex] ON #Staging(GroupNum, SublinkRollingSum)--Intended to improve performance of below while loop
--GO
WITH FirstRecord AS
(
SELECT TOP(1) *
FROM #Staging
ORDER BY ID, Link
)
UPDATE FirstRecord
SET GroupNum = 1, SublinkRollingSum = Sub_Links --This is the starting point
DECLARE
@CurrentID INT
, @CurrentLink VARCHAR(100)
, @CurrentGroup INT
, @SublinkRollingSum TINYINT
SELECT TOP(1)
@CurrentID = ID
, @CurrentLink = Link
, @CurrentGroup = GroupNum
, @SublinkRollingSum = SublinkRollingSum
FROM #Staging
ORDER BY ID, Link
WHILE (@@ROWCOUNT > 0)
BEGIN
UPDATE #Staging
SET SublinkRollingSum = @SublinkRollingSum
, GroupNum = @CurrentGroup
WHERE ID = @CurrentID
AND Link = @CurrentLink
SELECT TOP(1)
@CurrentID = ID
, @CurrentLink = Link
, @CurrentGroup =
CASE
WHEN (@SublinkRollingSum + Sub_Links <= 5)
THEN @CurrentGroup
ELSE @CurrentGroup + 1
END
, @SublinkRollingSum =
CASE
WHEN (@SublinkRollingSum + Sub_Links <= 5)
THEN @SublinkRollingSum + Sub_Links
ELSE Sub_Links
END
FROM #Staging
WHERE ID > @CurrentID
OR (ID = @CurrentID AND Link <> @CurrentLink)
ORDER BY ID, Link
END
SELECT
t.ID
, t.Sub_Links
, t.Link
, s.GroupNum
FROM #Staging S
JOIN Test t ON s.ID = t.ID
AND s.Link = t.Link
ORDER BY t.ID, t.Link
DROP TABLE #Staging
--DROP DATABASE [Test]
我有一列包含一个网站有多少sub_links。
我的 table 模式是 id,sub_link,link
id sub_link link group
1 5 link1.com 1
2 2 link2.com 2
3 4 link3.com 3
4 1 link4.com 3
4 1 link4.com 3
5 2 link5.com 4
6 4 link6.com 5
7 3 link7.com 6
7 3 link7.com 6
我想在 sub_link 的总数等于或小于 5 时添加一个带有组号的列,同时避免重复。
我不知道从哪里开始,因为我只能编写 select 语句并进行一些连接。 也许有一种使用 Window 函数或 CTE 的方法,我更喜欢它来进一步提升我的经验。
这是一个fiddlelink
https://www.db-fiddle.com/f/6rmtcazWaWvLULZ5QgpmSb/1
感谢您的帮助。
根据您在下面的评论,除了在单独的临时 table 中跟踪滚动 Sub_link 总和之外,我不确定如何执行此操作。下面是一个例子。如果您只需要跟踪当前记录和先前记录的 Sub_link,可能是使用 LAG function.
的单个查询来完成的方法USE [master]
GO
CREATE DATABASE [Test]
GO
USE [Test]
GO
CREATE TABLE [Test] (
ID INT
, Sub_Links TINYINT
, Link VARCHAR(100)
);
INSERT INTO [Test]
VALUES (1, 5, 'link1.com')
, (2, 2, 'link2.com')
, (3, 4, 'link3.com')
, (4, 1, 'link4.com')
, (4, 1, 'link4.com')
, (5, 2, 'link5.com')
, (6, 4, 'link6.com')
, (7, 3, 'link7.com')
, (7, 3, 'link7.com');
SET NOCOUNT ON
GO
CREATE TABLE #Staging
(
ID INT
, Link VARCHAR(100)
, Sub_Links INT
, GroupNum INT
, SublinkRollingSum TINYINT
)
GO
CREATE CLUSTERED INDEX [StagingOrder] ON #Staging(ID, Link) --Since need to guarantee order, doing this upfront should be more efficient
GO
INSERT INTO #Staging(ID, Link, Sub_Links)
SELECT DISTINCT --Don't include duplicate records
ID
, Link
, Sub_Links
FROM Test
ORDER BY ID, Link
GO
--CREATE INDEX [GroupIndex] ON #Staging(GroupNum, SublinkRollingSum)--Intended to improve performance of below while loop
--GO
WITH FirstRecord AS
(
SELECT TOP(1) *
FROM #Staging
ORDER BY ID, Link
)
UPDATE FirstRecord
SET GroupNum = 1, SublinkRollingSum = Sub_Links --This is the starting point
DECLARE
@CurrentID INT
, @CurrentLink VARCHAR(100)
, @CurrentGroup INT
, @SublinkRollingSum TINYINT
SELECT TOP(1)
@CurrentID = ID
, @CurrentLink = Link
, @CurrentGroup = GroupNum
, @SublinkRollingSum = SublinkRollingSum
FROM #Staging
ORDER BY ID, Link
WHILE (@@ROWCOUNT > 0)
BEGIN
UPDATE #Staging
SET SublinkRollingSum = @SublinkRollingSum
, GroupNum = @CurrentGroup
WHERE ID = @CurrentID
AND Link = @CurrentLink
SELECT TOP(1)
@CurrentID = ID
, @CurrentLink = Link
, @CurrentGroup =
CASE
WHEN (@SublinkRollingSum + Sub_Links <= 5)
THEN @CurrentGroup
ELSE @CurrentGroup + 1
END
, @SublinkRollingSum =
CASE
WHEN (@SublinkRollingSum + Sub_Links <= 5)
THEN @SublinkRollingSum + Sub_Links
ELSE Sub_Links
END
FROM #Staging
WHERE ID > @CurrentID
OR (ID = @CurrentID AND Link <> @CurrentLink)
ORDER BY ID, Link
END
SELECT
t.ID
, t.Sub_Links
, t.Link
, s.GroupNum
FROM #Staging S
JOIN Test t ON s.ID = t.ID
AND s.Link = t.Link
ORDER BY t.ID, t.Link
DROP TABLE #Staging
--DROP DATABASE [Test]