SQL 将多个多值列拆分为行
SQL Split Multiple Multivalue Columns into Rows
我有发送给我的数据,我需要对其进行规范化。数据在 sql table 中,但每一行都有多个多值列。示例如下:
ID fname lname projects projdates
1 John Doe projA;projB;projC 20150701;20150801;20150901
2 Jane Smith projD;;projC 20150701;;20150902
3 Lisa Anderson projB;projC 20150801;20150903
4 Nancy Johnson projB;projC;projE 20150601;20150822;20150904
5 Chris Edwards projA 20150905
也需要像这样:
ID fname lname projects projdates
1 John Doe projA 20150701
1 John Doe projB 20150801
1 John Doe projC 20150901
2 Jane Smith projD 20150701
2 Jane Smith projC 20150902
3 Lisa Anderson projB 20150801
3 Lisa Anderson projC 20150903
4 Nancy Johnson projB 20150601
4 Nancy Johnson projC 20150822
4 Nancy Johnson projE 20150904
5 Chris Edwards projA 20150905
我需要将其拆分为 id、fname、lname 的行,并将项目和产品日期解析为单独的记录。我发现许多带有拆分功能的帖子,我可以让它为 1 列工作,但不能为 2 列工作。当我做 2 列时,它会渗透到拆分中。即对于 John Doe,它为我提供了 3 次 projA 的记录,每个 proddates 一次。我需要将每个多值项目记录与其各自的项目日期而不是其他项目记录关联起来。
有什么想法吗?
谢谢!
你没有说你期望的结果是什么,但这可能是一个很好的起点:
declare @t table (ID int not null,fname varchar(17) not null,lname varchar(15) not null,
projects varchar(76) not null,projdates varchar(310) not null)
insert into @t(ID,fname,lname,projects,projdates) values
(1,'John', 'Doe', 'projA;projB;projC','20150701;20150801;20150901'),
(2,'Jane', 'Smith', 'projD;;projC', '20150701;;20150902' ),
(3,'Lisa', 'Anderson','projB;projC', '20150801;20150903' ),
(4,'Nancy','Johnson', 'projB;projC;projE','20150601;20150822;20150904'),
(5,'Chris','Edwards', 'projA', '20150905' )
;With Numbers as (
select ROW_NUMBER() OVER (ORDER BY Number) n
from master..spt_values
), ProjectPositions as (
select ID,n.n
from @t t
inner join
Numbers n
on SUBSTRING(t.projects,n.n,1) = ';'
union all
select ID,0 from @t
union all
select ID,LEN(projects)+1 from @t
), ProjectsNumbered as (
select *,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY n) rn
from ProjectPositions
), ProjectPartitions as (
select n1.ID,n1.n+1 as startat,n2.n as endat,n1.rn
from ProjectsNumbered n1
inner join
ProjectsNumbered n2
on
n1.id = n2.id and
n1.rn = n2.rn -1
), ProDatePositions as (
select ID,n.n
from @t t
inner join
Numbers n
on SUBSTRING(t.projdates,n.n,1) = ';'
union all
select ID,0 from @t
union all
select ID,LEN(projdates)+1 from @t
), ProDateNumbered as (
select *,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY n) rn
from ProDatePositions
), ProDatePartitions as (
select n1.ID,n1.n+1 as startat,n2.n as endat,n1.rn
from ProDateNumbered n1
inner join
ProDateNumbered n2
on
n1.id = n2.id and
n1.rn = n2.rn -1
)
select
t.ID,t.fname,t.lname,
SUBSTRING(projects,pp.startat,pp.endat - pp.startat) as project,
SUBSTRING(projdates,pdp.startat,pdp.endat - pdp.startat) as projdate
from
@t t
inner join
ProjectPartitions pp
on
t.ID = pp.ID
inner join
ProDatePartitions pdp
on
t.ID = pdp.ID and
pp.rn = pdp.rn
结果:
ID fname lname project projdate
----------- ----------------- --------------- ----------- ----------
1 John Doe projA 20150701
1 John Doe projB 20150801
1 John Doe projC 20150901
2 Jane Smith projD 20150701
2 Jane Smith
2 Jane Smith projC 20150902
3 Lisa Anderson projB 20150801
3 Lisa Anderson projC 20150903
4 Nancy Johnson projB 20150601
4 Nancy Johnson projC 20150822
4 Nancy Johnson projE 20150904
5 Chris Edwards projA 20150905
(不清楚您想为 ID
2 的 "empty" 项目做什么)
它是如何工作的 - 我们使用 ROW_NUMBER()
模拟了一个 Numbers
table - 我们在 master
中查询一个未记录的 table 但我们'没有使用 table 中的任何实际值 - 它只是已知有很多行。如果你有一个实数 table,你可以跳过那个 CTE。
然后我们执行两次相同的操作 - 我们将数字 table 连接到我们的数据 table 并使用它来查找 ;
个字符在我们想要的字符串中的位置分裂。我们还为位置 0(字符串开头之前)和字符串结尾之后的 1 位置创建了一对虚拟结果。这定义了 ProjectPositions
和 ProDatePositions
我们使用另一个 ROW_NUMBER()
(ProjectNumbered
、ProDateNumbered
对这些位置进行编号,然后使用该信息将连续的行连接在一起(ProjectPartitions
、ProDatePartitions
). 然后最终结果是我们已经计算出我们应该从两个字符串中提取子字符串的位置。
最后,我们将这些 "paritition" CTE 连接回原始数据 table,我们使用行号来确保我们对齐来自两个独立字符串的分区信息。
尝试使用以下查询。
SELECT A.ID ,a.fname,a.lname ,a.projects ,
ltrim(Split.a.value('.', 'VARCHAR(100)')) AS projdates
FROM (SELECT ID , fname, lname , projects,
CAST ('' + REPLACE([projdates], ';', '') + '' AS XML) AS String
FROM ) 作为交叉应用 String.nodes ('/M') AS Split(a);
试试这个你会得到你预期的输出。
谢谢。
如果您使用 Jeff Moden 的“DelimitedSplit8K”拆分器(我已在此处重命名 "fDelimitedSplit8K")
(参考图 21:最终的 "New" 拆分器代码,准备测试 )
为拆分做繁重的工作,剩下的就变得相当简单了,使用 CROSS APPLY 和 WHERE 来获得正确的连接。
IF object_ID (N'tempdb..#tInputData') is not null
DROP TABLE #tInputData
CREATE TABLE #tInputData (
ID INT
PRIMARY KEY CLUSTERED -- Add IDENTITY if ID needs to be set at INSERT time
, FName VARCHAR (30)
, LName VARCHAR (30)
, Projects VARCHAR (4000)
, ProjDates VARCHAR (4000)
)
INSERT INTO #tInputData
( ID, FName, LName, Projects, ProjDates )
VALUES
( 1, 'John', 'Doe' , 'projA;projB;projC' , '20150701;20150801;20150901'),
( 2, 'Jane', 'Smith' , 'projD;;projC' , '20150701;;20150902'),
( 3, 'Lisa', 'Anderson' , 'projB;projC' , '20150801;20150903'),
( 4, 'Nancy', 'Johnson' , 'projB;projC;projE' , '20150601;20150822;20150904'),
( 5, 'Chris', 'Edwards' , 'projA' , '20150905')
SELECT * FROM #tInputData -- Take a look at the INSERT results
; WITH ResultSet AS
(
SELECT
InData.ID
, InData.FName
, InData.LName
, ProjectList.ItemNumber AS ProjectID
, ProjectList.Item AS Project
, DateList.ItemNumber AS DateID
, DateList.Item AS ProjDate
FROM #tInputData AS InData
CROSS APPLY dbo.fDelimitedSplit8K(InData.Projects,';') AS ProjectList
CROSS APPLY dbo.fDelimitedSplit8K(InData.ProjDates,';') AS DateList
WHERE DateList.ItemNumber = ProjectList.ItemNumber -- Links projects and dates in left-to-r1ght order
AND (ProjectList.Item <> '' AND DateList.Item <> '') -- Ignore input lines when both Projects and ProjDates have no value; note that these aren't NULLs.
)
SELECT
ID
, FName
, LName
, Project
, ProjDate
FROM ResultSet
ORDER BY ID, Project
结果
ID FName LName Project ProjDate
-- ----- -------- ------- --------
1 John Doe projA 20150701
1 John Doe projB 20150801
1 John Doe projC 20150901
2 Jane Smith projC 20150902
2 Jane Smith projD 20150701
3 Lisa Anderson projB 20150801
3 Lisa Anderson projC 20150903
4 Nancy Johnson projB 20150601
4 Nancy Johnson projC 20150822
4 Nancy Johnson projE 20150904
5 Chris Edwards projA 20150905
此算法处理等长的项目和日期列表。对于给定的行,如果一个列表比另一个列表短,则需要特别注意在适当的位置应用 NULL。
-- Cleanup
DROP TABLE #tInputData
检查这个答案(数据库:SQL Server 2019)
select name, Location_Type, b.value as spltrows from Location__c A
CROSS APPLY string_split(A.Location_Type,';') b
where Location_Type is not null
ORDER BY 1
在 Location_Type 列中,我们有多个值,例如“Mailing;Shipping;Billing”
我有发送给我的数据,我需要对其进行规范化。数据在 sql table 中,但每一行都有多个多值列。示例如下:
ID fname lname projects projdates
1 John Doe projA;projB;projC 20150701;20150801;20150901
2 Jane Smith projD;;projC 20150701;;20150902
3 Lisa Anderson projB;projC 20150801;20150903
4 Nancy Johnson projB;projC;projE 20150601;20150822;20150904
5 Chris Edwards projA 20150905
也需要像这样:
ID fname lname projects projdates
1 John Doe projA 20150701
1 John Doe projB 20150801
1 John Doe projC 20150901
2 Jane Smith projD 20150701
2 Jane Smith projC 20150902
3 Lisa Anderson projB 20150801
3 Lisa Anderson projC 20150903
4 Nancy Johnson projB 20150601
4 Nancy Johnson projC 20150822
4 Nancy Johnson projE 20150904
5 Chris Edwards projA 20150905
我需要将其拆分为 id、fname、lname 的行,并将项目和产品日期解析为单独的记录。我发现许多带有拆分功能的帖子,我可以让它为 1 列工作,但不能为 2 列工作。当我做 2 列时,它会渗透到拆分中。即对于 John Doe,它为我提供了 3 次 projA 的记录,每个 proddates 一次。我需要将每个多值项目记录与其各自的项目日期而不是其他项目记录关联起来。
有什么想法吗?
谢谢!
你没有说你期望的结果是什么,但这可能是一个很好的起点:
declare @t table (ID int not null,fname varchar(17) not null,lname varchar(15) not null,
projects varchar(76) not null,projdates varchar(310) not null)
insert into @t(ID,fname,lname,projects,projdates) values
(1,'John', 'Doe', 'projA;projB;projC','20150701;20150801;20150901'),
(2,'Jane', 'Smith', 'projD;;projC', '20150701;;20150902' ),
(3,'Lisa', 'Anderson','projB;projC', '20150801;20150903' ),
(4,'Nancy','Johnson', 'projB;projC;projE','20150601;20150822;20150904'),
(5,'Chris','Edwards', 'projA', '20150905' )
;With Numbers as (
select ROW_NUMBER() OVER (ORDER BY Number) n
from master..spt_values
), ProjectPositions as (
select ID,n.n
from @t t
inner join
Numbers n
on SUBSTRING(t.projects,n.n,1) = ';'
union all
select ID,0 from @t
union all
select ID,LEN(projects)+1 from @t
), ProjectsNumbered as (
select *,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY n) rn
from ProjectPositions
), ProjectPartitions as (
select n1.ID,n1.n+1 as startat,n2.n as endat,n1.rn
from ProjectsNumbered n1
inner join
ProjectsNumbered n2
on
n1.id = n2.id and
n1.rn = n2.rn -1
), ProDatePositions as (
select ID,n.n
from @t t
inner join
Numbers n
on SUBSTRING(t.projdates,n.n,1) = ';'
union all
select ID,0 from @t
union all
select ID,LEN(projdates)+1 from @t
), ProDateNumbered as (
select *,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY n) rn
from ProDatePositions
), ProDatePartitions as (
select n1.ID,n1.n+1 as startat,n2.n as endat,n1.rn
from ProDateNumbered n1
inner join
ProDateNumbered n2
on
n1.id = n2.id and
n1.rn = n2.rn -1
)
select
t.ID,t.fname,t.lname,
SUBSTRING(projects,pp.startat,pp.endat - pp.startat) as project,
SUBSTRING(projdates,pdp.startat,pdp.endat - pdp.startat) as projdate
from
@t t
inner join
ProjectPartitions pp
on
t.ID = pp.ID
inner join
ProDatePartitions pdp
on
t.ID = pdp.ID and
pp.rn = pdp.rn
结果:
ID fname lname project projdate
----------- ----------------- --------------- ----------- ----------
1 John Doe projA 20150701
1 John Doe projB 20150801
1 John Doe projC 20150901
2 Jane Smith projD 20150701
2 Jane Smith
2 Jane Smith projC 20150902
3 Lisa Anderson projB 20150801
3 Lisa Anderson projC 20150903
4 Nancy Johnson projB 20150601
4 Nancy Johnson projC 20150822
4 Nancy Johnson projE 20150904
5 Chris Edwards projA 20150905
(不清楚您想为 ID
2 的 "empty" 项目做什么)
它是如何工作的 - 我们使用 ROW_NUMBER()
模拟了一个 Numbers
table - 我们在 master
中查询一个未记录的 table 但我们'没有使用 table 中的任何实际值 - 它只是已知有很多行。如果你有一个实数 table,你可以跳过那个 CTE。
然后我们执行两次相同的操作 - 我们将数字 table 连接到我们的数据 table 并使用它来查找 ;
个字符在我们想要的字符串中的位置分裂。我们还为位置 0(字符串开头之前)和字符串结尾之后的 1 位置创建了一对虚拟结果。这定义了 ProjectPositions
和 ProDatePositions
我们使用另一个 ROW_NUMBER()
(ProjectNumbered
、ProDateNumbered
对这些位置进行编号,然后使用该信息将连续的行连接在一起(ProjectPartitions
、ProDatePartitions
). 然后最终结果是我们已经计算出我们应该从两个字符串中提取子字符串的位置。
最后,我们将这些 "paritition" CTE 连接回原始数据 table,我们使用行号来确保我们对齐来自两个独立字符串的分区信息。
尝试使用以下查询。
SELECT A.ID ,a.fname,a.lname ,a.projects ,
ltrim(Split.a.value('.', 'VARCHAR(100)')) AS projdates
FROM (SELECT ID , fname, lname , projects,
CAST ('' + REPLACE([projdates], ';', '') + '' AS XML) AS String
FROM ) 作为交叉应用 String.nodes ('/M') AS Split(a);
试试这个你会得到你预期的输出。
谢谢。
如果您使用 Jeff Moden 的“DelimitedSplit8K”拆分器(我已在此处重命名 "fDelimitedSplit8K")
(参考图 21:最终的 "New" 拆分器代码,准备测试 )
为拆分做繁重的工作,剩下的就变得相当简单了,使用 CROSS APPLY 和 WHERE 来获得正确的连接。
IF object_ID (N'tempdb..#tInputData') is not null
DROP TABLE #tInputData
CREATE TABLE #tInputData (
ID INT
PRIMARY KEY CLUSTERED -- Add IDENTITY if ID needs to be set at INSERT time
, FName VARCHAR (30)
, LName VARCHAR (30)
, Projects VARCHAR (4000)
, ProjDates VARCHAR (4000)
)
INSERT INTO #tInputData
( ID, FName, LName, Projects, ProjDates )
VALUES
( 1, 'John', 'Doe' , 'projA;projB;projC' , '20150701;20150801;20150901'),
( 2, 'Jane', 'Smith' , 'projD;;projC' , '20150701;;20150902'),
( 3, 'Lisa', 'Anderson' , 'projB;projC' , '20150801;20150903'),
( 4, 'Nancy', 'Johnson' , 'projB;projC;projE' , '20150601;20150822;20150904'),
( 5, 'Chris', 'Edwards' , 'projA' , '20150905')
SELECT * FROM #tInputData -- Take a look at the INSERT results
; WITH ResultSet AS
(
SELECT
InData.ID
, InData.FName
, InData.LName
, ProjectList.ItemNumber AS ProjectID
, ProjectList.Item AS Project
, DateList.ItemNumber AS DateID
, DateList.Item AS ProjDate
FROM #tInputData AS InData
CROSS APPLY dbo.fDelimitedSplit8K(InData.Projects,';') AS ProjectList
CROSS APPLY dbo.fDelimitedSplit8K(InData.ProjDates,';') AS DateList
WHERE DateList.ItemNumber = ProjectList.ItemNumber -- Links projects and dates in left-to-r1ght order
AND (ProjectList.Item <> '' AND DateList.Item <> '') -- Ignore input lines when both Projects and ProjDates have no value; note that these aren't NULLs.
)
SELECT
ID
, FName
, LName
, Project
, ProjDate
FROM ResultSet
ORDER BY ID, Project
结果
ID FName LName Project ProjDate
-- ----- -------- ------- --------
1 John Doe projA 20150701
1 John Doe projB 20150801
1 John Doe projC 20150901
2 Jane Smith projC 20150902
2 Jane Smith projD 20150701
3 Lisa Anderson projB 20150801
3 Lisa Anderson projC 20150903
4 Nancy Johnson projB 20150601
4 Nancy Johnson projC 20150822
4 Nancy Johnson projE 20150904
5 Chris Edwards projA 20150905
此算法处理等长的项目和日期列表。对于给定的行,如果一个列表比另一个列表短,则需要特别注意在适当的位置应用 NULL。
-- Cleanup
DROP TABLE #tInputData
检查这个答案(数据库:SQL Server 2019)
select name, Location_Type, b.value as spltrows from Location__c A
CROSS APPLY string_split(A.Location_Type,';') b
where Location_Type is not null
ORDER BY 1
在 Location_Type 列中,我们有多个值,例如“Mailing;Shipping;Billing”