SQL 将逗号分隔的字段拆分为多行,每个用户列一行
TSQL Splitting Comma Separated Field into Multiple Rows with a Row per user Column
我在 MS SQL Server 2008 中使用遗留 tables,需要创建一个 view
以新系统需要的方式显示数据。这是遗产 table.
Table
id userid sport1 sport1level sport2 sport2level
----------------------------------------------------------------------------------------------------------------------------------
1 11 Baseball Varsity Baseball Recreational
2 22 Baseball,Basketball Varsity,Junior Varsity Baseball Varsity
3 33 Soccer Varsity Soccer,Track & Field Recreational,Intramural
4 44 null null Tennis Varsity
5 55 Volleyball Varsity null null
6 66 Baseball,Basketball Varsity,Varsity Soccer,Football Varsity,Varsity
7 77 Baseball,Basketball,Rowing Varsity,Varsity,Varsity Soccer,Football,Volleyball Varsity,Varsity,Recreational
这是我们正在寻找的结果:
结果
id userid sport sportlevel1 sportlevel2
---------------------------------------------------------------------------------------
1_1 11 Baseball Varsity Recreational
2_1 22 Baseball Varsity Varsity
2_2 22 Basketball Junior Varsity null
3_1 33 Soccer Varsity Recreational
3_2 33 Track & Field null Intramural
4_1 44 Tennis null Varsity
5_1 55 Volleyball Varsity null
6_1 66 Baseball Varsity null
6_2 66 Basketball Varsity null
6_3 66 Soccer null Varsity
6_4 66 Football null Varsity
7_1 77 Baseball Varsity null
7_2 77 Basketball Varsity null
7_3 77 Rowing Varsity null
7_4 77 Soccer null Varsity
7_5 77 Football null Varsity
7_6 77 Volleyball null Recreational
主要注意事项:
- 原始 table 可能包含超过 2 个逗号分隔值(我添加了第 7 行以显示此
遗留 table 中的 id
是 int
但在新的 table 中不一定需要这种方式
- 您可能已经注意到新 table 的
id
是 {original id}_{incremental sport count per user}
的串联。其中 {incremental sport count per user}
是 sub id
,如果您愿意的话,对于用户选择的每项运动。例如:userid = 2
选择了 2 distinct
项运动:棒球 和 篮球,即使 棒球 分为两列。
如果我必须创建辅助函数或其他任何东西,请告诉我。
如果您有任何疑问或需要更多信息,请告诉我。
请不要尝试询问为什么采用这种结构或尝试为新格式提供更好的结构。谢谢
不太优雅,但它完成了工作:
WITH
Data AS(
SELECT *
FROM (
VALUES ( 1, 11, 'Baseball ', 'Varsity ', 'Baseball ', 'Recreational ' )
, ( 2, 22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball ', 'Varsity ' )
, ( 3, 33, 'Soccer ', 'Varsity ', 'Soccer,Track & Field', 'Recreational,Intramural' )
, ( 4, 44, NULL, NULL, 'Tennis ', 'Varsity ' )
, ( 5, 55, 'Volleyball ', 'Varsity ', NULL, NULL )
, ( 6, 66, 'Baseball,Basketball', 'Varsity,Varsity ', 'Soccer,Football ', 'Varsity,Varsity ' )
, ( 7, 77, 'Baseball,Football,Rugby,Wrestling', 'Varsity,Varsity,Varsity,Junior Varsity', 'Rugby', 'Recreational' )
) AS T(id, userid, sport1, sport1level, sport2, sport2level)
),
SplitValues AS(
-- Substring logic is in the Anchor record ommited to prevent repetition of
-- code, therefor level 0 needs to be ignored
SELECT
id
, userid
, [level] = 0
, sport1 = sport1
, sport1level = sport1level
, sport2 = sport2
, sport2level = sport2level
, sport1Remainder = sport1
, sport1levelRemainder = sport1level
, sport2Remainder = sport2
, sport2levelRemainder = sport2level
FROM data
UNION ALL
SELECT
id
, userid
, [level] = [level] + 1
, sport1 = SUBSTRING(sport1Remainder, 1, ISNULL(NULLIF(CHARINDEX(',', sport1Remainder)- 1, -1), LEN(sport1Remainder)))
, sport1level = SUBSTRING(sport1levelRemainder, 1, ISNULL(NULLIF(CHARINDEX(',', sport1levelRemainder)- 1, -1), LEN(sport1levelRemainder)))
, sport2 = SUBSTRING(sport2Remainder, 1, ISNULL(NULLIF(CHARINDEX(',', sport2Remainder)- 1, -1), LEN(sport2Remainder)))
, sport2level = SUBSTRING(sport2levelRemainder, 1, ISNULL(NULLIF(CHARINDEX(',', sport2levelRemainder)- 1, -1), LEN(sport2levelRemainder)))
, sport1Remainder = SUBSTRING(sport1Remainder, NULLIF(CHARINDEX(',', sport1Remainder)+1, 1), LEN(sport1Remainder))
, sport1levelRemainder = SUBSTRING(sport1levelRemainder, NULLIF(CHARINDEX(',', sport1levelRemainder)+1, 1), LEN(sport1levelRemainder))
, sport2Remainder = SUBSTRING(sport2Remainder, NULLIF(CHARINDEX(',', sport2Remainder)+1, 1), LEN(sport2Remainder))
, sport2levelRemainder = SUBSTRING(sport2levelRemainder, NULLIF(CHARINDEX(',', sport2levelRemainder)+1, 1), LEN(sport2levelRemainder))
FROM SplitValues
WHERE sport1Remainder IS NOT NULL
OR sport2Remainder IS NOT NULL
),
SplitRowsWithDifferentSport AS(
SELECT id
, userid
, sport1
, sport1level
, sport1level2 = CASE WHEN sport1 = sport2 THEN sport2level END
FROM SplitValues
WHERE [level] <> 0
UNION ALL
SELECT id
, userid
, sport2
, null
, sport1level2 = sport2level
FROM SplitValues
WHERE ISNULL(sport1, '') <> sport2
AND [level] <> 0
)
SELECT id = CAST(S.id AS VARCHAR(max)) + '_' +
CAST(ROW_NUMBER() OVER (PARTITION BY S.userid ORDER BY s.id) AS VARCHAR(max))
, S.sport1
, sport1level1 = MAX(S.sport1level)
, sport1level2 = MAX(S.sport1level2)
FROM SplitRowsWithDifferentSport AS S
WHERE S.sport1 IS NOT NULL
GROUP BY S.ID, S.userid, S.sport1
ORDER BY id
编辑: 更改了 SplitValues CTE 以允许在单个列中进行多项运动。现在支持每行最多 99 项运动。如果您需要更高,请添加 OPTION(MAXRECURSION 0)
以完全没有限制。
EDIT2: 添加了分组依据以摆脱多行的相同运动。
您期望的输出没有优化,因为它提供了一种在 SportLevel2 中产生空值的方法。您应该将每项运动和每个级别存储为单独的行,例如:
nid userid SportName SportLevel
1_1 11 Baseball Varsity
1_2 11 Baseball Recreational
2_1 22 Baseball Varsity
2_2 22 Baseball Varsity
3_1 33 Soccer Varsity
3_2 33 Soccer Recreational
3_3 33 Track & Field Recreation
4_2 44 Tennis Varsity
5_1 55 Volleyball Varsity
6_1 66 Baseball Varsity
6_2 66 Soccer Varsity
6_2 66 Basketball Varsity
6_3 66 Football Varsity
为此,您可以按如下方式使用 CTE:
DECLARE @tmp TABLE(id INT IDENTITY(1,1), userid INT , sport1 VARCHAR(150), sport1level VARCHAR(150), sport2 VARCHAR(150), sport2level VARCHAR(150))
INSERT INTO @tmp (userid, sport1, sport1level, sport2, sport2level)
VALUES(11, 'Baseball', 'Varsity', 'Baseball', 'Recreational'),
(22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball', 'Varsity'),
(33, 'Soccer', 'Varsity', 'Soccer,Track & Field', 'Recreational,Intramural'),
(44, null, null, 'Tennis', 'Varsity'),
(55, 'Volleyball', 'Varsity', null, null),
(66, 'Baseball,Basketball', 'Varsity,Varsity', 'Soccer,Football', 'Varsity,Varsity')
;WITH Sports AS
(
--1) initial value
-- a) no commas in sport1
SELECT id, userid, 1 AS sportid, sport1 AS SportName, sport1level AS SportLevel,
NULL AS SportNameRemainder, NULL AS SportLevelRemainder
FROM @tmp
WHERE CHARINDEX(',', sport1)=0 AND CHARINDEX(',', sport1level)=0
UNION ALL
-- b) no commas in sport2
SELECT id, userid, 2 AS sportid, sport2 AS SportName, sport2level AS SportLevel,
NULL AS SportNameRemainder, NULL AS SportLevelRemainder
FROM @tmp
WHERE CHARINDEX(',', sport2)=0 AND CHARINDEX(',', sport2level)=0
UNION ALL
-- c) commas in sport1
SELECT id, userid, 1 AS sportid, LEFT(sport1, CHARINDEX(',', sport1)-1) AS SportName, LEFT(sport1level , CHARINDEX(',', sport1level)-1) AS SportLevel,
RIGHT(sport1, LEN(sport1) - CHARINDEX(',', sport1)) AS SportNameRemainder, LEFT(sport1level , LEN(sport1level) - CHARINDEX(',', sport1level)) AS SportLevelRemainder
FROM @tmp
WHERE CHARINDEX(',', sport1)>0 AND CHARINDEX(',', sport1level)>0
UNION ALL
-- d) commas in sport2
SELECT id, userid, 2 AS sportid, LEFT(sport2, CHARINDEX(',', sport2)-1) AS SportName, LEFT(sport2level , CHARINDEX(',', sport2level)-1) AS SportLevel,
RIGHT(sport2, LEN(sport2) - CHARINDEX(',', sport2)) AS SportNameRemainder, LEFT(sport2level , LEN(sport2level) - CHARINDEX(',', sport2level)) AS SportLevelRemainder
FROM @tmp
WHERE CHARINDEX(',', sport2)>0 AND CHARINDEX(',', sport2level)>0
UNION ALL
--2) recursive part
SELECT id, userid, sportid +1 AS sportid, SportNameRemainder AS SportName, SportLevelRemainder AS SportLevel, NULL AS SportNameRemainder, NULL AS SportLevelRemainder
FROM Sports
WHERE CHARINDEX(',', SportNameRemainder)=0 AND CHARINDEX(',', SportLevelRemainder)=0
)
SELECT CONCAT(CONVERT(VARCHAR(5), id), '_', CONVERT(VARCHAR(5), sportid)) AS nid, userid, SportName, SportLevel
FROM Sports
ORDER BY id, userid, sportid
随时根据您的需要进行更改。
注意:我建议将 SportLevel 中的字符串值替换为其数值,并且不要将 id 与 SportLevel,例如:Varsity 可能有值 1,Recreational - 2,等等。相同的逻辑应该用于 SportName。可能需要连接 2 个表中的数据。如果您需要帮助,请致电 ;)
试试这个:
WITH Data
AS ( SELECT *
FROM ( VALUES
( 1, 11, 'Baseball', 'Varsity', 'Baseball', 'Recreational'),
( 2, 22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball', 'Varsity'),
( 3, 33, 'Soccer', 'Varsity', 'Soccer,Track & Field', 'Recreational,Intramural'),
( 4, 44, NULL , NULL , 'Tennis', 'Varsity'),
( 5, 55, 'Volleyball', 'Varsity', NULL , NULL ),
( 6, 66, 'Baseball,Basketball', 'Varsity,Varsity', 'Soccer,Football', 'Varsity,Varsity'),
( 7, 77, 'Baseball,Basketball,Rowing', 'Varsity,Varsity,Varsity', 'Soccer,Football,Volleyball', 'Varsity,Varsity,Recreational') )
AS T ( id, userid, sport1, sportlevel1, sport2, sportlevel2 )
),
Tally
AS ( SELECT n = 1
UNION ALL
SELECT n + 1
FROM Tally
WHERE n <= 100
),
Sprt1
AS ( SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sport1) = 0 THEN sport1
ELSE SUBSTRING(sport1, T.n,
CHARINDEX(',',
SUBSTRING(sport1 + ',',
t.n, 100)) - 1)
END AS sport1
FROM data AS d
JOIN Tally AS T ON COALESCE(LEN(d.sport1), 1) >= t.n
AND SUBSTRING(','
+ COALESCE(d.sport1,
''), n, 1) = ','
),
lvl1
AS ( SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sportlevel1) = 0
THEN sportlevel1
ELSE SUBSTRING(sportlevel1, T.n,
CHARINDEX(',',
SUBSTRING(sportlevel1
+ ',', t.n, 100))
- 1)
END AS sportlevel1
FROM data AS d
JOIN Tally AS T ON COALESCE(LEN(d.sportlevel1), 1) >= t.n
AND SUBSTRING(','
+ COALESCE(d.sportlevel1,
''), n, 1) = ','
),
sprt2
AS ( SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sport2) = 0 THEN sport2
ELSE SUBSTRING(sport2, T.n,
CHARINDEX(',',
SUBSTRING(sport2 + ',',
t.n, 100)) - 1)
END AS sport2
FROM data AS d
JOIN Tally AS T ON COALESCE(LEN(d.sport2), 1) >= t.n
AND SUBSTRING(','
+ COALESCE(d.sport2,
''), n, 1) = ','
),
lvl2
AS ( SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sportlevel2) = 0
THEN sportlevel2
ELSE SUBSTRING(sportlevel2, T.n,
CHARINDEX(',',
SUBSTRING(sportlevel2
+ ',', t.n, 100))
- 1)
END AS sportlevel2
FROM data AS d
JOIN Tally AS T ON COALESCE(LEN(d.sportlevel2), 1) >= t.n
AND SUBSTRING(','
+ COALESCE(d.sportlevel2,
''), n, 1) = ','
),
final
AS ( SELECT COALESCE(sprt1.RN, lvl1.RN, sprt2.rn, lvl2.rn) AS RN ,
COALESCE(sprt1.id, lvl1.id, sprt2.id, lvl2.id) AS id ,
COALESCE(sprt1.userid, lvl1.userid, sprt2.userid,
lvl2.userid) AS userid ,
sprt1.sport1 ,
lvl1.sportlevel1 ,
sprt2.sport2 ,
lvl2.sportlevel2
FROM sprt1
FULL JOIN lvl1 ON sprt1.id = lvl1.id
AND sprt1.RN = lvl1.RN
FULL JOIN sprt2 ON COALESCE(sprt1.id, lvl1.id) = sprt2.id
AND COALESCE(sprt1.RN, lvl1.RN) = sprt2.RN
FULL JOIN lvl2 ON COALESCE(sprt1.id, lvl1.id, sprt2.id) = lvl2.id
AND COALESCE(sprt1.RN, lvl1.RN,
sprt2.rn) = lvl2.RN
)
SELECT CONVERT(VARCHAR, id) + '_'
+ CONVERT(VARCHAR, ROW_NUMBER() OVER ( PARTITION BY id ORDER BY userid , part , sport )) AS id ,
userid ,
sport ,
sportlevel1 ,
sportlevel2
FROM ( SELECT 1 part ,
id ,
userid ,
sport1 AS sport ,
sportlevel1 ,
CASE WHEN sport1 = sport2 THEN sportlevel2
ELSE NULL
END sportlevel2
FROM final
WHERE sport1 IS NOT NULL
UNION ALL
SELECT 2 ,
id ,
userid ,
sport2 ,
NULL ,
sportlevel2
FROM final
WHERE sport2 IS NOT NULL
AND sport2 != COALESCE(sport1, '')
) FinalDataset
OPTION ( MAXRECURSION 1000 )
输出
存储过程代码
--Create demo temp table for testing
IF OBJECT_ID('Tempdb..#Data') IS NOT NULL
DROP TABLE #Data
SELECT *
INTO #Data
FROM ( VALUES ( 1, 11, 'Baseball', 'Varsity', 'Baseball', 'Recreational'),
( 2, 22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball', 'Varsity'),
( 3, 33, 'Soccer', 'Varsity', 'Soccer,Track & Field', 'Recreational,Intramural'),
( 4, 44, NULL , NULL , 'Tennis', 'Varsity'),
( 5, 55, 'Volleyball', 'Varsity', NULL , NULL ),
( 6, 66, 'Baseball,Basketball', 'Varsity,Varsity', 'Soccer,Football', 'Varsity,Varsity'),
( 7, 77, 'Baseball,Basketball,Rowing', 'Varsity,Varsity,Varsity', 'Soccer,Football,Volleyball', 'Varsity,Varsity,Recreational') )
AS T ( id, userid, sport1, sportlevel1, sport2, sportlevel2 );
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
--- CODE BELOW CAN BE USED THRO STORED PROCEDURE, VIEW DOES NOT ALLOW TEMP TABLE USAGE ---
----------------------------------------------------------------------------------------------------
--Create temp table with sequence num
IF OBJECT_ID('Tempdb..#Tally') IS NOT NULL --<<<~this code not required in Stored procedure, can be deleted
DROP TABLE #Tally --<<<~this code not required in Stored procedure, can be deleted
CREATE TABLE #Tally ( N INT PRIMARY KEY )
DECLARE @i INT = 1
WHILE @i < 1000
BEGIN
INSERT INTO #Tally
SELECT @i
SET @i = @i + 1
END
----------------------------------------------------------------------------------------------------
--split sport2 field and create temp table for final result
IF OBJECT_ID('Tempdb..#sprt1') IS NOT NULL
DROP TABLE #sprt1
SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sport1) = 0 THEN sport1
ELSE SUBSTRING(sport1, T.n,
CHARINDEX(',', SUBSTRING(sport1 + ',', t.n, 100))
- 1)
END AS sport1
INTO #sprt1
FROM #data AS d
JOIN #Tally AS T ON COALESCE(LEN(d.sport1), 1) >= t.n
AND SUBSTRING(',' + COALESCE(d.sport1, ''), n, 1) = ','
----------------------------------------------------------------------------------------------------
--split sportlevel1 field and create temp table for final result
IF OBJECT_ID('Tempdb..#lvl1') IS NOT NULL
DROP TABLE #lvl1
SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sportlevel1) = 0 THEN sportlevel1
ELSE SUBSTRING(sportlevel1, T.n,
CHARINDEX(',',
SUBSTRING(sportlevel1 + ',', t.n, 100))
- 1)
END AS sportlevel1
INTO #lvl1
FROM #data AS d
JOIN #Tally AS T ON COALESCE(LEN(d.sportlevel1), 1) >= t.n
AND SUBSTRING(',' + COALESCE(d.sportlevel1, ''), n,
1) = ','
----------------------------------------------------------------------------------------------------
--split sport2 field and create temp table for final result
IF OBJECT_ID('Tempdb..#sprt2') IS NOT NULL
DROP TABLE #sprt2
SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sport2) = 0 THEN sport2
ELSE SUBSTRING(sport2, T.n,
CHARINDEX(',', SUBSTRING(sport2 + ',', t.n, 100))
- 1)
END AS sport2
INTO #sprt2
FROM #data AS d
JOIN #Tally AS T ON COALESCE(LEN(d.sport2), 1) >= t.n
AND SUBSTRING(',' + COALESCE(d.sport2, ''), n, 1) = ','
----------------------------------------------------------------------------------------------------
--split sportlevel2 field and create temp table for final result
IF OBJECT_ID('Tempdb..#lvl2') IS NOT NULL
DROP TABLE #lvl2
SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sportlevel2) = 0 THEN sportlevel2
ELSE SUBSTRING(sportlevel2, T.n,
CHARINDEX(',',
SUBSTRING(sportlevel2 + ',', t.n, 100))
- 1)
END AS sportlevel2
INTO #lvl2
FROM #data AS d
JOIN #Tally AS T ON COALESCE(LEN(d.sportlevel2), 1) >= t.n
AND SUBSTRING(',' + COALESCE(d.sportlevel2, ''), n,
1) = ','
----------------------------------------------------------------------------------------------------
--final data set
IF OBJECT_ID('Tempdb..#Final') IS NOT NULL
DROP TABLE #Final
SELECT COALESCE(S1.RN, L1.RN, S2.rn, L2.rn) AS RN ,
COALESCE(S1.id, L1.id, S2.id, L2.id) AS id ,
COALESCE(S1.userid, L1.userid, S2.userid, L2.userid) AS userid ,
S1.sport1 ,
L1.sportlevel1 ,
S2.sport2 ,
L2.sportlevel2
INTO #Final
FROM #sprt1 AS S1
FULL JOIN #lvl1 AS L1 ON S1.id = L1.id
AND S1.RN = L1.RN
FULL JOIN #sprt2 AS S2 ON COALESCE(S1.id, L1.id) = S2.id
AND COALESCE(S1.RN, L1.RN) = S2.RN
FULL JOIN #lvl2 AS L2 ON COALESCE(S1.id, L1.id, S2.id) = L2.id
AND COALESCE(S1.RN, L1.RN, S2.rn) = L2.RN
----------------------------------------------------------------------------------------------------
--Final output query
SELECT CONVERT(VARCHAR, id) + '_'
+ CONVERT(VARCHAR, ROW_NUMBER() OVER ( PARTITION BY id ORDER BY userid , part , sport )) AS id ,
userid ,
sport ,
sportlevel1 ,
sportlevel2
FROM ( SELECT 1 part ,
id ,
userid ,
sport1 AS sport ,
sportlevel1 ,
CASE WHEN sport1 = sport2 THEN sportlevel2
ELSE NULL
END sportlevel2
FROM #final
WHERE sport1 IS NOT NULL
UNION ALL
SELECT 2 ,
id ,
userid ,
sport2 ,
NULL ,
sportlevel2
FROM #final
WHERE sport2 IS NOT NULL
AND sport2 != COALESCE(sport1, '')
) FinalDataset
以下方法使用 CTE,但它们不是递归的。输出正是您想要的,并且可以处理 CSV 字段中的任意数量的值。我使用了一个基于 SQLCLR 的字符串拆分器,它在 SQL# 库中免费提供(我写的,但是 2 个 String_Split 函数在免费版本中)。如果您愿意,可以使用另一个拆分器,但只要确保它不是标量 T-SQL UDF.
请注意,对于 "new" ID
字段,而不是将有效的 INT
转换为 VARCHAR
,以便它可以与独特的运动连接#对于那个 UserID
,我把它变成了 DECIMAL,并将独特的运动 # 放在小数点的右边。这应该比字符串字段更快地过滤和排序。如果某人可以拥有超过 9 项运动,则将 / 10.0
更改为 / 100.0
。
SET NOCOUNT ON;
DECLARE @SampleData TABLE
(
Id INT NOT NULL,
UserID INT NOT NULL,
Sport1 VARCHAR(500),
SportLevel1 VARCHAR(500),
Sport2 VARCHAR(500),
SportLevel2 VARCHAR(500)
);
INSERT INTO @SampleData VALUES (1, 11, 'Baseball', 'Varsity', 'Baseball', 'Recreational');
INSERT INTO @SampleData VALUES (2, 22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball', 'Varsity');
INSERT INTO @SampleData VALUES (3, 33, 'Soccer', 'Varsity', 'Soccer,Track & Field', 'Recreational,Intramural');
INSERT INTO @SampleData VALUES (4, 44, NULL , NULL , 'Tennis', 'Varsity');
INSERT INTO @SampleData VALUES (5, 55, 'Volleyball', 'Varsity', NULL , NULL);
INSERT INTO @SampleData VALUES (6, 66, 'Baseball,Basketball', 'Varsity,Varsity', 'Soccer,Football', 'Varsity,Varsity');
INSERT INTO @SampleData VALUES (7, 77, 'Baseball,Basketball,Rowing', 'Varsity,Varsity,Varsity', 'Soccer,Football,Volleyball', 'Varsity,Varsity,Recreational');
---------------------------------------------------------------
;WITH s1 AS
(
SELECT d1.Id, d1.UserID, v1.SplitNum, v1.SplitVal AS [Sport]
FROM @SampleData d1
CROSS APPLY SQL#.String_Split4k(d1.Sport1, N',', 1) v1
), sl1 AS
(
SELECT d2.Id, d2.UserID, v2.SplitNum, v2.SplitVal AS [SportLevel1]
FROM @SampleData d2
CROSS APPLY SQL#.String_Split4k(d2.SportLevel1, N',', 1) v2
), s2 AS
(
SELECT d3.Id, d3.UserID, v3.SplitNum, v3.SplitVal AS [Sport]
FROM @SampleData d3
CROSS APPLY SQL#.String_Split4k(d3.Sport2, N',', 1) v3
), sl2 AS
(
SELECT d4.Id, d4.UserID, v4.SplitNum, v4.SplitVal AS [SportLevel2]
FROM @SampleData d4
CROSS APPLY SQL#.String_Split4k(d4.SportLevel2, N',', 1) v4
)
SELECT COALESCE(s1.UserID, s2.UserID) +
(ROW_NUMBER() OVER (PARTITION BY COALESCE(s1.UserID, s2.UserID)
ORDER BY CASE
WHEN sl1.SportLevel1 IS NOT NULL
AND sl2.SportLevel2 IS NOT NULL THEN 1
WHEN sl1.SportLevel1 IS NOT NULL THEN 2
ELSE 3
END ASC,
COALESCE(s1.Sport, s2.Sport) ASC
) / 10.0
) AS [UserID],
COALESCE(s1.Sport, s2.Sport) AS [Sport],
sl1.SportLevel1,
sl2.SportLevel2
FROM s1
INNER JOIN sl1
ON sl1.Id = s1.Id
AND sl1.SplitNum = s1.SplitNum
FULL OUTER JOIN (
s2
INNER JOIN sl2
ON sl2.Id = s2.Id
AND sl2.SplitNum = s2.SplitNum
)
ON s2.Id = s1.Id
AND s2.Sport = s1.Sport
ORDER BY COALESCE(s1.UserID, s2.UserID) ASC,
CASE WHEN sl1.SportLevel1 IS NOT NULL THEN 1 ELSE 2 END ASC,
COALESCE(s1.Sport, s2.Sport) ASC;
我在 MS SQL Server 2008 中使用遗留 tables,需要创建一个 view
以新系统需要的方式显示数据。这是遗产 table.
Table
id userid sport1 sport1level sport2 sport2level
----------------------------------------------------------------------------------------------------------------------------------
1 11 Baseball Varsity Baseball Recreational
2 22 Baseball,Basketball Varsity,Junior Varsity Baseball Varsity
3 33 Soccer Varsity Soccer,Track & Field Recreational,Intramural
4 44 null null Tennis Varsity
5 55 Volleyball Varsity null null
6 66 Baseball,Basketball Varsity,Varsity Soccer,Football Varsity,Varsity
7 77 Baseball,Basketball,Rowing Varsity,Varsity,Varsity Soccer,Football,Volleyball Varsity,Varsity,Recreational
这是我们正在寻找的结果:
结果
id userid sport sportlevel1 sportlevel2
---------------------------------------------------------------------------------------
1_1 11 Baseball Varsity Recreational
2_1 22 Baseball Varsity Varsity
2_2 22 Basketball Junior Varsity null
3_1 33 Soccer Varsity Recreational
3_2 33 Track & Field null Intramural
4_1 44 Tennis null Varsity
5_1 55 Volleyball Varsity null
6_1 66 Baseball Varsity null
6_2 66 Basketball Varsity null
6_3 66 Soccer null Varsity
6_4 66 Football null Varsity
7_1 77 Baseball Varsity null
7_2 77 Basketball Varsity null
7_3 77 Rowing Varsity null
7_4 77 Soccer null Varsity
7_5 77 Football null Varsity
7_6 77 Volleyball null Recreational
主要注意事项:
- 原始 table 可能包含超过 2 个逗号分隔值(我添加了第 7 行以显示此 遗留 table 中的
id
是int
但在新的 table 中不一定需要这种方式
- 您可能已经注意到新 table 的
id
是{original id}_{incremental sport count per user}
的串联。其中{incremental sport count per user}
是sub id
,如果您愿意的话,对于用户选择的每项运动。例如:userid = 2
选择了 2distinct
项运动:棒球 和 篮球,即使 棒球 分为两列。
如果我必须创建辅助函数或其他任何东西,请告诉我。
如果您有任何疑问或需要更多信息,请告诉我。
请不要尝试询问为什么采用这种结构或尝试为新格式提供更好的结构。谢谢
不太优雅,但它完成了工作:
WITH
Data AS(
SELECT *
FROM (
VALUES ( 1, 11, 'Baseball ', 'Varsity ', 'Baseball ', 'Recreational ' )
, ( 2, 22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball ', 'Varsity ' )
, ( 3, 33, 'Soccer ', 'Varsity ', 'Soccer,Track & Field', 'Recreational,Intramural' )
, ( 4, 44, NULL, NULL, 'Tennis ', 'Varsity ' )
, ( 5, 55, 'Volleyball ', 'Varsity ', NULL, NULL )
, ( 6, 66, 'Baseball,Basketball', 'Varsity,Varsity ', 'Soccer,Football ', 'Varsity,Varsity ' )
, ( 7, 77, 'Baseball,Football,Rugby,Wrestling', 'Varsity,Varsity,Varsity,Junior Varsity', 'Rugby', 'Recreational' )
) AS T(id, userid, sport1, sport1level, sport2, sport2level)
),
SplitValues AS(
-- Substring logic is in the Anchor record ommited to prevent repetition of
-- code, therefor level 0 needs to be ignored
SELECT
id
, userid
, [level] = 0
, sport1 = sport1
, sport1level = sport1level
, sport2 = sport2
, sport2level = sport2level
, sport1Remainder = sport1
, sport1levelRemainder = sport1level
, sport2Remainder = sport2
, sport2levelRemainder = sport2level
FROM data
UNION ALL
SELECT
id
, userid
, [level] = [level] + 1
, sport1 = SUBSTRING(sport1Remainder, 1, ISNULL(NULLIF(CHARINDEX(',', sport1Remainder)- 1, -1), LEN(sport1Remainder)))
, sport1level = SUBSTRING(sport1levelRemainder, 1, ISNULL(NULLIF(CHARINDEX(',', sport1levelRemainder)- 1, -1), LEN(sport1levelRemainder)))
, sport2 = SUBSTRING(sport2Remainder, 1, ISNULL(NULLIF(CHARINDEX(',', sport2Remainder)- 1, -1), LEN(sport2Remainder)))
, sport2level = SUBSTRING(sport2levelRemainder, 1, ISNULL(NULLIF(CHARINDEX(',', sport2levelRemainder)- 1, -1), LEN(sport2levelRemainder)))
, sport1Remainder = SUBSTRING(sport1Remainder, NULLIF(CHARINDEX(',', sport1Remainder)+1, 1), LEN(sport1Remainder))
, sport1levelRemainder = SUBSTRING(sport1levelRemainder, NULLIF(CHARINDEX(',', sport1levelRemainder)+1, 1), LEN(sport1levelRemainder))
, sport2Remainder = SUBSTRING(sport2Remainder, NULLIF(CHARINDEX(',', sport2Remainder)+1, 1), LEN(sport2Remainder))
, sport2levelRemainder = SUBSTRING(sport2levelRemainder, NULLIF(CHARINDEX(',', sport2levelRemainder)+1, 1), LEN(sport2levelRemainder))
FROM SplitValues
WHERE sport1Remainder IS NOT NULL
OR sport2Remainder IS NOT NULL
),
SplitRowsWithDifferentSport AS(
SELECT id
, userid
, sport1
, sport1level
, sport1level2 = CASE WHEN sport1 = sport2 THEN sport2level END
FROM SplitValues
WHERE [level] <> 0
UNION ALL
SELECT id
, userid
, sport2
, null
, sport1level2 = sport2level
FROM SplitValues
WHERE ISNULL(sport1, '') <> sport2
AND [level] <> 0
)
SELECT id = CAST(S.id AS VARCHAR(max)) + '_' +
CAST(ROW_NUMBER() OVER (PARTITION BY S.userid ORDER BY s.id) AS VARCHAR(max))
, S.sport1
, sport1level1 = MAX(S.sport1level)
, sport1level2 = MAX(S.sport1level2)
FROM SplitRowsWithDifferentSport AS S
WHERE S.sport1 IS NOT NULL
GROUP BY S.ID, S.userid, S.sport1
ORDER BY id
编辑: 更改了 SplitValues CTE 以允许在单个列中进行多项运动。现在支持每行最多 99 项运动。如果您需要更高,请添加 OPTION(MAXRECURSION 0)
以完全没有限制。
EDIT2: 添加了分组依据以摆脱多行的相同运动。
您期望的输出没有优化,因为它提供了一种在 SportLevel2 中产生空值的方法。您应该将每项运动和每个级别存储为单独的行,例如:
nid userid SportName SportLevel
1_1 11 Baseball Varsity
1_2 11 Baseball Recreational
2_1 22 Baseball Varsity
2_2 22 Baseball Varsity
3_1 33 Soccer Varsity
3_2 33 Soccer Recreational
3_3 33 Track & Field Recreation
4_2 44 Tennis Varsity
5_1 55 Volleyball Varsity
6_1 66 Baseball Varsity
6_2 66 Soccer Varsity
6_2 66 Basketball Varsity
6_3 66 Football Varsity
为此,您可以按如下方式使用 CTE:
DECLARE @tmp TABLE(id INT IDENTITY(1,1), userid INT , sport1 VARCHAR(150), sport1level VARCHAR(150), sport2 VARCHAR(150), sport2level VARCHAR(150))
INSERT INTO @tmp (userid, sport1, sport1level, sport2, sport2level)
VALUES(11, 'Baseball', 'Varsity', 'Baseball', 'Recreational'),
(22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball', 'Varsity'),
(33, 'Soccer', 'Varsity', 'Soccer,Track & Field', 'Recreational,Intramural'),
(44, null, null, 'Tennis', 'Varsity'),
(55, 'Volleyball', 'Varsity', null, null),
(66, 'Baseball,Basketball', 'Varsity,Varsity', 'Soccer,Football', 'Varsity,Varsity')
;WITH Sports AS
(
--1) initial value
-- a) no commas in sport1
SELECT id, userid, 1 AS sportid, sport1 AS SportName, sport1level AS SportLevel,
NULL AS SportNameRemainder, NULL AS SportLevelRemainder
FROM @tmp
WHERE CHARINDEX(',', sport1)=0 AND CHARINDEX(',', sport1level)=0
UNION ALL
-- b) no commas in sport2
SELECT id, userid, 2 AS sportid, sport2 AS SportName, sport2level AS SportLevel,
NULL AS SportNameRemainder, NULL AS SportLevelRemainder
FROM @tmp
WHERE CHARINDEX(',', sport2)=0 AND CHARINDEX(',', sport2level)=0
UNION ALL
-- c) commas in sport1
SELECT id, userid, 1 AS sportid, LEFT(sport1, CHARINDEX(',', sport1)-1) AS SportName, LEFT(sport1level , CHARINDEX(',', sport1level)-1) AS SportLevel,
RIGHT(sport1, LEN(sport1) - CHARINDEX(',', sport1)) AS SportNameRemainder, LEFT(sport1level , LEN(sport1level) - CHARINDEX(',', sport1level)) AS SportLevelRemainder
FROM @tmp
WHERE CHARINDEX(',', sport1)>0 AND CHARINDEX(',', sport1level)>0
UNION ALL
-- d) commas in sport2
SELECT id, userid, 2 AS sportid, LEFT(sport2, CHARINDEX(',', sport2)-1) AS SportName, LEFT(sport2level , CHARINDEX(',', sport2level)-1) AS SportLevel,
RIGHT(sport2, LEN(sport2) - CHARINDEX(',', sport2)) AS SportNameRemainder, LEFT(sport2level , LEN(sport2level) - CHARINDEX(',', sport2level)) AS SportLevelRemainder
FROM @tmp
WHERE CHARINDEX(',', sport2)>0 AND CHARINDEX(',', sport2level)>0
UNION ALL
--2) recursive part
SELECT id, userid, sportid +1 AS sportid, SportNameRemainder AS SportName, SportLevelRemainder AS SportLevel, NULL AS SportNameRemainder, NULL AS SportLevelRemainder
FROM Sports
WHERE CHARINDEX(',', SportNameRemainder)=0 AND CHARINDEX(',', SportLevelRemainder)=0
)
SELECT CONCAT(CONVERT(VARCHAR(5), id), '_', CONVERT(VARCHAR(5), sportid)) AS nid, userid, SportName, SportLevel
FROM Sports
ORDER BY id, userid, sportid
随时根据您的需要进行更改。
注意:我建议将 SportLevel 中的字符串值替换为其数值,并且不要将 id 与 SportLevel,例如:Varsity 可能有值 1,Recreational - 2,等等。相同的逻辑应该用于 SportName。可能需要连接 2 个表中的数据。如果您需要帮助,请致电 ;)
试试这个:
WITH Data
AS ( SELECT *
FROM ( VALUES
( 1, 11, 'Baseball', 'Varsity', 'Baseball', 'Recreational'),
( 2, 22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball', 'Varsity'),
( 3, 33, 'Soccer', 'Varsity', 'Soccer,Track & Field', 'Recreational,Intramural'),
( 4, 44, NULL , NULL , 'Tennis', 'Varsity'),
( 5, 55, 'Volleyball', 'Varsity', NULL , NULL ),
( 6, 66, 'Baseball,Basketball', 'Varsity,Varsity', 'Soccer,Football', 'Varsity,Varsity'),
( 7, 77, 'Baseball,Basketball,Rowing', 'Varsity,Varsity,Varsity', 'Soccer,Football,Volleyball', 'Varsity,Varsity,Recreational') )
AS T ( id, userid, sport1, sportlevel1, sport2, sportlevel2 )
),
Tally
AS ( SELECT n = 1
UNION ALL
SELECT n + 1
FROM Tally
WHERE n <= 100
),
Sprt1
AS ( SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sport1) = 0 THEN sport1
ELSE SUBSTRING(sport1, T.n,
CHARINDEX(',',
SUBSTRING(sport1 + ',',
t.n, 100)) - 1)
END AS sport1
FROM data AS d
JOIN Tally AS T ON COALESCE(LEN(d.sport1), 1) >= t.n
AND SUBSTRING(','
+ COALESCE(d.sport1,
''), n, 1) = ','
),
lvl1
AS ( SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sportlevel1) = 0
THEN sportlevel1
ELSE SUBSTRING(sportlevel1, T.n,
CHARINDEX(',',
SUBSTRING(sportlevel1
+ ',', t.n, 100))
- 1)
END AS sportlevel1
FROM data AS d
JOIN Tally AS T ON COALESCE(LEN(d.sportlevel1), 1) >= t.n
AND SUBSTRING(','
+ COALESCE(d.sportlevel1,
''), n, 1) = ','
),
sprt2
AS ( SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sport2) = 0 THEN sport2
ELSE SUBSTRING(sport2, T.n,
CHARINDEX(',',
SUBSTRING(sport2 + ',',
t.n, 100)) - 1)
END AS sport2
FROM data AS d
JOIN Tally AS T ON COALESCE(LEN(d.sport2), 1) >= t.n
AND SUBSTRING(','
+ COALESCE(d.sport2,
''), n, 1) = ','
),
lvl2
AS ( SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sportlevel2) = 0
THEN sportlevel2
ELSE SUBSTRING(sportlevel2, T.n,
CHARINDEX(',',
SUBSTRING(sportlevel2
+ ',', t.n, 100))
- 1)
END AS sportlevel2
FROM data AS d
JOIN Tally AS T ON COALESCE(LEN(d.sportlevel2), 1) >= t.n
AND SUBSTRING(','
+ COALESCE(d.sportlevel2,
''), n, 1) = ','
),
final
AS ( SELECT COALESCE(sprt1.RN, lvl1.RN, sprt2.rn, lvl2.rn) AS RN ,
COALESCE(sprt1.id, lvl1.id, sprt2.id, lvl2.id) AS id ,
COALESCE(sprt1.userid, lvl1.userid, sprt2.userid,
lvl2.userid) AS userid ,
sprt1.sport1 ,
lvl1.sportlevel1 ,
sprt2.sport2 ,
lvl2.sportlevel2
FROM sprt1
FULL JOIN lvl1 ON sprt1.id = lvl1.id
AND sprt1.RN = lvl1.RN
FULL JOIN sprt2 ON COALESCE(sprt1.id, lvl1.id) = sprt2.id
AND COALESCE(sprt1.RN, lvl1.RN) = sprt2.RN
FULL JOIN lvl2 ON COALESCE(sprt1.id, lvl1.id, sprt2.id) = lvl2.id
AND COALESCE(sprt1.RN, lvl1.RN,
sprt2.rn) = lvl2.RN
)
SELECT CONVERT(VARCHAR, id) + '_'
+ CONVERT(VARCHAR, ROW_NUMBER() OVER ( PARTITION BY id ORDER BY userid , part , sport )) AS id ,
userid ,
sport ,
sportlevel1 ,
sportlevel2
FROM ( SELECT 1 part ,
id ,
userid ,
sport1 AS sport ,
sportlevel1 ,
CASE WHEN sport1 = sport2 THEN sportlevel2
ELSE NULL
END sportlevel2
FROM final
WHERE sport1 IS NOT NULL
UNION ALL
SELECT 2 ,
id ,
userid ,
sport2 ,
NULL ,
sportlevel2
FROM final
WHERE sport2 IS NOT NULL
AND sport2 != COALESCE(sport1, '')
) FinalDataset
OPTION ( MAXRECURSION 1000 )
输出
存储过程代码
--Create demo temp table for testing
IF OBJECT_ID('Tempdb..#Data') IS NOT NULL
DROP TABLE #Data
SELECT *
INTO #Data
FROM ( VALUES ( 1, 11, 'Baseball', 'Varsity', 'Baseball', 'Recreational'),
( 2, 22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball', 'Varsity'),
( 3, 33, 'Soccer', 'Varsity', 'Soccer,Track & Field', 'Recreational,Intramural'),
( 4, 44, NULL , NULL , 'Tennis', 'Varsity'),
( 5, 55, 'Volleyball', 'Varsity', NULL , NULL ),
( 6, 66, 'Baseball,Basketball', 'Varsity,Varsity', 'Soccer,Football', 'Varsity,Varsity'),
( 7, 77, 'Baseball,Basketball,Rowing', 'Varsity,Varsity,Varsity', 'Soccer,Football,Volleyball', 'Varsity,Varsity,Recreational') )
AS T ( id, userid, sport1, sportlevel1, sport2, sportlevel2 );
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
--- CODE BELOW CAN BE USED THRO STORED PROCEDURE, VIEW DOES NOT ALLOW TEMP TABLE USAGE ---
----------------------------------------------------------------------------------------------------
--Create temp table with sequence num
IF OBJECT_ID('Tempdb..#Tally') IS NOT NULL --<<<~this code not required in Stored procedure, can be deleted
DROP TABLE #Tally --<<<~this code not required in Stored procedure, can be deleted
CREATE TABLE #Tally ( N INT PRIMARY KEY )
DECLARE @i INT = 1
WHILE @i < 1000
BEGIN
INSERT INTO #Tally
SELECT @i
SET @i = @i + 1
END
----------------------------------------------------------------------------------------------------
--split sport2 field and create temp table for final result
IF OBJECT_ID('Tempdb..#sprt1') IS NOT NULL
DROP TABLE #sprt1
SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sport1) = 0 THEN sport1
ELSE SUBSTRING(sport1, T.n,
CHARINDEX(',', SUBSTRING(sport1 + ',', t.n, 100))
- 1)
END AS sport1
INTO #sprt1
FROM #data AS d
JOIN #Tally AS T ON COALESCE(LEN(d.sport1), 1) >= t.n
AND SUBSTRING(',' + COALESCE(d.sport1, ''), n, 1) = ','
----------------------------------------------------------------------------------------------------
--split sportlevel1 field and create temp table for final result
IF OBJECT_ID('Tempdb..#lvl1') IS NOT NULL
DROP TABLE #lvl1
SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sportlevel1) = 0 THEN sportlevel1
ELSE SUBSTRING(sportlevel1, T.n,
CHARINDEX(',',
SUBSTRING(sportlevel1 + ',', t.n, 100))
- 1)
END AS sportlevel1
INTO #lvl1
FROM #data AS d
JOIN #Tally AS T ON COALESCE(LEN(d.sportlevel1), 1) >= t.n
AND SUBSTRING(',' + COALESCE(d.sportlevel1, ''), n,
1) = ','
----------------------------------------------------------------------------------------------------
--split sport2 field and create temp table for final result
IF OBJECT_ID('Tempdb..#sprt2') IS NOT NULL
DROP TABLE #sprt2
SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sport2) = 0 THEN sport2
ELSE SUBSTRING(sport2, T.n,
CHARINDEX(',', SUBSTRING(sport2 + ',', t.n, 100))
- 1)
END AS sport2
INTO #sprt2
FROM #data AS d
JOIN #Tally AS T ON COALESCE(LEN(d.sport2), 1) >= t.n
AND SUBSTRING(',' + COALESCE(d.sport2, ''), n, 1) = ','
----------------------------------------------------------------------------------------------------
--split sportlevel2 field and create temp table for final result
IF OBJECT_ID('Tempdb..#lvl2') IS NOT NULL
DROP TABLE #lvl2
SELECT ROW_NUMBER() OVER ( PARTITION BY Id, userid ORDER BY Id, userid , T.n ) AS RN ,
id ,
userid ,
CASE WHEN CHARINDEX(',', sportlevel2) = 0 THEN sportlevel2
ELSE SUBSTRING(sportlevel2, T.n,
CHARINDEX(',',
SUBSTRING(sportlevel2 + ',', t.n, 100))
- 1)
END AS sportlevel2
INTO #lvl2
FROM #data AS d
JOIN #Tally AS T ON COALESCE(LEN(d.sportlevel2), 1) >= t.n
AND SUBSTRING(',' + COALESCE(d.sportlevel2, ''), n,
1) = ','
----------------------------------------------------------------------------------------------------
--final data set
IF OBJECT_ID('Tempdb..#Final') IS NOT NULL
DROP TABLE #Final
SELECT COALESCE(S1.RN, L1.RN, S2.rn, L2.rn) AS RN ,
COALESCE(S1.id, L1.id, S2.id, L2.id) AS id ,
COALESCE(S1.userid, L1.userid, S2.userid, L2.userid) AS userid ,
S1.sport1 ,
L1.sportlevel1 ,
S2.sport2 ,
L2.sportlevel2
INTO #Final
FROM #sprt1 AS S1
FULL JOIN #lvl1 AS L1 ON S1.id = L1.id
AND S1.RN = L1.RN
FULL JOIN #sprt2 AS S2 ON COALESCE(S1.id, L1.id) = S2.id
AND COALESCE(S1.RN, L1.RN) = S2.RN
FULL JOIN #lvl2 AS L2 ON COALESCE(S1.id, L1.id, S2.id) = L2.id
AND COALESCE(S1.RN, L1.RN, S2.rn) = L2.RN
----------------------------------------------------------------------------------------------------
--Final output query
SELECT CONVERT(VARCHAR, id) + '_'
+ CONVERT(VARCHAR, ROW_NUMBER() OVER ( PARTITION BY id ORDER BY userid , part , sport )) AS id ,
userid ,
sport ,
sportlevel1 ,
sportlevel2
FROM ( SELECT 1 part ,
id ,
userid ,
sport1 AS sport ,
sportlevel1 ,
CASE WHEN sport1 = sport2 THEN sportlevel2
ELSE NULL
END sportlevel2
FROM #final
WHERE sport1 IS NOT NULL
UNION ALL
SELECT 2 ,
id ,
userid ,
sport2 ,
NULL ,
sportlevel2
FROM #final
WHERE sport2 IS NOT NULL
AND sport2 != COALESCE(sport1, '')
) FinalDataset
以下方法使用 CTE,但它们不是递归的。输出正是您想要的,并且可以处理 CSV 字段中的任意数量的值。我使用了一个基于 SQLCLR 的字符串拆分器,它在 SQL# 库中免费提供(我写的,但是 2 个 String_Split 函数在免费版本中)。如果您愿意,可以使用另一个拆分器,但只要确保它不是标量 T-SQL UDF.
请注意,对于 "new" ID
字段,而不是将有效的 INT
转换为 VARCHAR
,以便它可以与独特的运动连接#对于那个 UserID
,我把它变成了 DECIMAL,并将独特的运动 # 放在小数点的右边。这应该比字符串字段更快地过滤和排序。如果某人可以拥有超过 9 项运动,则将 / 10.0
更改为 / 100.0
。
SET NOCOUNT ON;
DECLARE @SampleData TABLE
(
Id INT NOT NULL,
UserID INT NOT NULL,
Sport1 VARCHAR(500),
SportLevel1 VARCHAR(500),
Sport2 VARCHAR(500),
SportLevel2 VARCHAR(500)
);
INSERT INTO @SampleData VALUES (1, 11, 'Baseball', 'Varsity', 'Baseball', 'Recreational');
INSERT INTO @SampleData VALUES (2, 22, 'Baseball,Basketball', 'Varsity,Junior Varsity', 'Baseball', 'Varsity');
INSERT INTO @SampleData VALUES (3, 33, 'Soccer', 'Varsity', 'Soccer,Track & Field', 'Recreational,Intramural');
INSERT INTO @SampleData VALUES (4, 44, NULL , NULL , 'Tennis', 'Varsity');
INSERT INTO @SampleData VALUES (5, 55, 'Volleyball', 'Varsity', NULL , NULL);
INSERT INTO @SampleData VALUES (6, 66, 'Baseball,Basketball', 'Varsity,Varsity', 'Soccer,Football', 'Varsity,Varsity');
INSERT INTO @SampleData VALUES (7, 77, 'Baseball,Basketball,Rowing', 'Varsity,Varsity,Varsity', 'Soccer,Football,Volleyball', 'Varsity,Varsity,Recreational');
---------------------------------------------------------------
;WITH s1 AS
(
SELECT d1.Id, d1.UserID, v1.SplitNum, v1.SplitVal AS [Sport]
FROM @SampleData d1
CROSS APPLY SQL#.String_Split4k(d1.Sport1, N',', 1) v1
), sl1 AS
(
SELECT d2.Id, d2.UserID, v2.SplitNum, v2.SplitVal AS [SportLevel1]
FROM @SampleData d2
CROSS APPLY SQL#.String_Split4k(d2.SportLevel1, N',', 1) v2
), s2 AS
(
SELECT d3.Id, d3.UserID, v3.SplitNum, v3.SplitVal AS [Sport]
FROM @SampleData d3
CROSS APPLY SQL#.String_Split4k(d3.Sport2, N',', 1) v3
), sl2 AS
(
SELECT d4.Id, d4.UserID, v4.SplitNum, v4.SplitVal AS [SportLevel2]
FROM @SampleData d4
CROSS APPLY SQL#.String_Split4k(d4.SportLevel2, N',', 1) v4
)
SELECT COALESCE(s1.UserID, s2.UserID) +
(ROW_NUMBER() OVER (PARTITION BY COALESCE(s1.UserID, s2.UserID)
ORDER BY CASE
WHEN sl1.SportLevel1 IS NOT NULL
AND sl2.SportLevel2 IS NOT NULL THEN 1
WHEN sl1.SportLevel1 IS NOT NULL THEN 2
ELSE 3
END ASC,
COALESCE(s1.Sport, s2.Sport) ASC
) / 10.0
) AS [UserID],
COALESCE(s1.Sport, s2.Sport) AS [Sport],
sl1.SportLevel1,
sl2.SportLevel2
FROM s1
INNER JOIN sl1
ON sl1.Id = s1.Id
AND sl1.SplitNum = s1.SplitNum
FULL OUTER JOIN (
s2
INNER JOIN sl2
ON sl2.Id = s2.Id
AND sl2.SplitNum = s2.SplitNum
)
ON s2.Id = s1.Id
AND s2.Sport = s1.Sport
ORDER BY COALESCE(s1.UserID, s2.UserID) ASC,
CASE WHEN sl1.SportLevel1 IS NOT NULL THEN 1 ELSE 2 END ASC,
COALESCE(s1.Sport, s2.Sport) ASC;