比较 SQL 服务器中由特殊字符分隔的值
Comparing values separated by a special characters in SQL Server
我有2列,比方说COLA
和COLB
,数据如下:
COLA | COLB
------------------+------------------
PLATE|SPOON|GLASS | PLATE|GLASS|SPOON
PLATE | SPOON
OIL|JUG|MAT | JUG|MAT
SPOON | SPOON
OIL|MAT | MAT|OIL
我正在尝试 return 不匹配的行,而不考虑顺序。
预期输出:
COLA | COLB
------------+--------
PLATE | SPOON
OIL|JUG|MAT | JUG|MAT
我已经尝试了类似下面的方法和很多方法,但都没有用。我对 SQL 部分了解不多:
SELECT *
FROM MYTABLE
WHERE COLA NOT LIKE '%COLB%'
一种方法是递归子查询:
with cte as (
select convert(varchar(max), null) as parta,
convert(varchar(max), cola) as resta,
cola, colb,
row_number() over (order by (select null)) as seqnum
from t
union all
select convert(varchar(max),
left(resta, charindex('|', resta + '|') - 1)
) as parta,
convert(varchar(max),
stuff(resta, 1, charindex('|', resta + '|'), '')
) as resta,
cola, colb, seqnum
from cte
where resta <> ''
)
select cola, colb
from cte
where parta is not null
group by seqnum, cola, colb
having sum(case when concat('|', colb, '|') like concat('%|', parta, '|%') then 1 else 0 end) <> count(*) or
len(cola) <> len(colb);
Here 是一个 db<>fiddle.
这在支持字符串拆分和聚合的 SQL 服务器的最新版本中要简单得多。
您可以使用用户定义的函数来拆分每列中的分隔字符串,然后比较该函数的结果。
我选择为 SQL 服务器使用最快的字符串拆分函数之一(在 2016 年之前,它具有内置的字符串拆分功能)- Jeff Moden 的 DelimitedSplit8K。您可以在他的文章 Tally OH! An Improved SQL 8K “CSV Splitter” Function.
中阅读所有相关内容
首先,创建并填充示例 table(请在您以后的问题中为我们省去这一步):
DECLARE @T AS TABLE (
ColA varchar(100),
ColB varchar(100)
);
INSERT INTO @T (ColA, ColB) VALUES
('PLATE|SPOON|GLASS', 'PLATE|GLASS|SPOON'),
('PLATE', 'SPOON'),
('OIL|JUG|MAT', 'JUG|MAT'),
('SPOON', 'SPOON'),
('OIL|MAT', 'MAT|OIL');
查询:
SELECT ColA, ColB
FROM @T
WHERE EXISTS (
SELECT Item FROM [dbo].[DelimitedSplit8K](ColA, '|')
EXCEPT
SELECT Item FROM [dbo].[DelimitedSplit8K](ColB, '|')
)
OR
EXISTS (
SELECT Item FROM [dbo].[DelimitedSplit8K](ColB, '|')
EXCEPT
SELECT Item FROM [dbo].[DelimitedSplit8K](ColA, '|')
)
结果:
ColA ColB
PLATE SPOON
OIL|JUG|MAT JUG|MAT
这是一种依赖于使用 xml 函数将逗号 sep 字符串拆分为行的方法。然后比较 cola 和 colb 中的值并返回差异
with data2
as (select row_number() over(order by (select null)) as rnk ,cola,colb
from t
)
,combo_data
as(
SELECT a.rnk
,a.cola
,a.colb
,Split.a.value('.', 'NVARCHAR(max)') AS Data
,1 as a_flag
,null as b_flag
FROM ( SELECT rnk
,cola
,colb
,CAST ('<M>' + REPLACE(cola, '|', '</M><M>') + '</M>' AS XML) AS Data
FROM data2
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
union all
SELECT a.rnk
,a.cola
,a.colb
,Split.a.value('.', 'NVARCHAR(max)') AS Data
,null as a_flag
,1 as b_flag
FROM ( SELECT rnk
,cola
,colb
,CAST ('<M>' + REPLACE(colb, '|', '</M><M>') + '</M>' AS XML) AS Data
FROM data2
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
)
select rnk,cola,colb,data,count(a_flag) as present_in_cola,count(b_flag) as present_in_colb
from combo_data
group by rnk,cola,colb,data
having count(a_flag) <> count(b_flag)
order by 1,2,3,4
+-----+-------------+---------+-------+-----------------+-----------------+
| rnk | cola | colb | data | present_in_cola | present_in_colb |
+-----+-------------+---------+-------+-----------------+-----------------+
| 2 | PLATE | SPOON | PLATE | 1 | 0 |
| 2 | PLATE | SPOON | SPOON | 0 | 1 |
| 3 | OIL|JUG|MAT | JUG|MAT | OIL | 1 | 0 |
+-----+-------------+---------+-------+-----------------+-----------------+
db fiddle link
https://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=4c677e8628d2734305f2b7f1923e583b
通过使用 CROSS APPLY
将 2 个字符串转换为 XML 类型。
然后可以在 EXISTS
子句中比较那些 XML 的节点值。
示例数据:
CREATE TABLE YourTable
(
ID INT IDENTITY(1,1) PRIMARY KEY,
ColA NVARCHAR(100),
ColB NVARCHAR(100)
);
INSERT INTO YourTable (ColA, ColB) VALUES
('PLATE|SPOON|GLASS', 'PLATE|GLASS|SPOON')
, ('PLATE', 'SPOON')
, ('OIL|JUG|MAT', 'JUG|MAT')
, ('SPOON', 'SPOON')
, ('OIL|MAT', 'MAT|OIL');
GO
查询:
SELECT t.*
FROM YourTable t
CROSS APPLY
(
SELECT
CAST('<a>'+REPLACE(t.ColA,'|','</a><a>')+'</a>' AS XML) AS XmlA,
CAST('<b>'+REPLACE(t.ColB,'|','</b><b>')+'</b>' AS XML) AS XmlB
) caX
WHERE EXISTS
(
SELECT 1
FROM
(
(
SELECT a.val.value('.','nvarchar(100)') AS val
FROM caX.XmlA.nodes('/a') AS a(val)
EXCEPT
SELECT b.val.value('.','nvarchar(100)') AS val
FROM caX.XmlB.nodes('/b') AS b(val)
)
UNION ALL
(
SELECT b.val.value('.','nvarchar(100)') AS val
FROM caX.XmlB.nodes('/b') AS b(val)
EXCEPT
SELECT a.val.value('.','nvarchar(100)') AS val
FROM caX.XmlA.nodes('/a') AS a(val)
)
) q
);
结果:
ID | ColA | ColB
-: | :---------- | :------
2 | PLATE | SPOON
3 | OIL|JUG|MAT | JUG|MAT
在 db<>fiddle here
上测试
额外:
在 Sql Server 2016 及更高版本中,STRING_SPLIT 功能可用。
那么这个较短的替代方案将起作用:
SELECT t.*
FROM YourTable t
WHERE EXISTS
(
SELECT 1
FROM STRING_SPLIT(ColA,'|') a
FULL JOIN STRING_SPLIT(ColB,'|') b
ON a.value = b.value
WHERE a.value IS NULL
OR b.value IS NULL
);
我有2列,比方说COLA
和COLB
,数据如下:
COLA | COLB
------------------+------------------
PLATE|SPOON|GLASS | PLATE|GLASS|SPOON
PLATE | SPOON
OIL|JUG|MAT | JUG|MAT
SPOON | SPOON
OIL|MAT | MAT|OIL
我正在尝试 return 不匹配的行,而不考虑顺序。
预期输出:
COLA | COLB
------------+--------
PLATE | SPOON
OIL|JUG|MAT | JUG|MAT
我已经尝试了类似下面的方法和很多方法,但都没有用。我对 SQL 部分了解不多:
SELECT *
FROM MYTABLE
WHERE COLA NOT LIKE '%COLB%'
一种方法是递归子查询:
with cte as (
select convert(varchar(max), null) as parta,
convert(varchar(max), cola) as resta,
cola, colb,
row_number() over (order by (select null)) as seqnum
from t
union all
select convert(varchar(max),
left(resta, charindex('|', resta + '|') - 1)
) as parta,
convert(varchar(max),
stuff(resta, 1, charindex('|', resta + '|'), '')
) as resta,
cola, colb, seqnum
from cte
where resta <> ''
)
select cola, colb
from cte
where parta is not null
group by seqnum, cola, colb
having sum(case when concat('|', colb, '|') like concat('%|', parta, '|%') then 1 else 0 end) <> count(*) or
len(cola) <> len(colb);
Here 是一个 db<>fiddle.
这在支持字符串拆分和聚合的 SQL 服务器的最新版本中要简单得多。
您可以使用用户定义的函数来拆分每列中的分隔字符串,然后比较该函数的结果。
我选择为 SQL 服务器使用最快的字符串拆分函数之一(在 2016 年之前,它具有内置的字符串拆分功能)- Jeff Moden 的 DelimitedSplit8K。您可以在他的文章 Tally OH! An Improved SQL 8K “CSV Splitter” Function.
中阅读所有相关内容首先,创建并填充示例 table(请在您以后的问题中为我们省去这一步):
DECLARE @T AS TABLE (
ColA varchar(100),
ColB varchar(100)
);
INSERT INTO @T (ColA, ColB) VALUES
('PLATE|SPOON|GLASS', 'PLATE|GLASS|SPOON'),
('PLATE', 'SPOON'),
('OIL|JUG|MAT', 'JUG|MAT'),
('SPOON', 'SPOON'),
('OIL|MAT', 'MAT|OIL');
查询:
SELECT ColA, ColB
FROM @T
WHERE EXISTS (
SELECT Item FROM [dbo].[DelimitedSplit8K](ColA, '|')
EXCEPT
SELECT Item FROM [dbo].[DelimitedSplit8K](ColB, '|')
)
OR
EXISTS (
SELECT Item FROM [dbo].[DelimitedSplit8K](ColB, '|')
EXCEPT
SELECT Item FROM [dbo].[DelimitedSplit8K](ColA, '|')
)
结果:
ColA ColB
PLATE SPOON
OIL|JUG|MAT JUG|MAT
这是一种依赖于使用 xml 函数将逗号 sep 字符串拆分为行的方法。然后比较 cola 和 colb 中的值并返回差异
with data2
as (select row_number() over(order by (select null)) as rnk ,cola,colb
from t
)
,combo_data
as(
SELECT a.rnk
,a.cola
,a.colb
,Split.a.value('.', 'NVARCHAR(max)') AS Data
,1 as a_flag
,null as b_flag
FROM ( SELECT rnk
,cola
,colb
,CAST ('<M>' + REPLACE(cola, '|', '</M><M>') + '</M>' AS XML) AS Data
FROM data2
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
union all
SELECT a.rnk
,a.cola
,a.colb
,Split.a.value('.', 'NVARCHAR(max)') AS Data
,null as a_flag
,1 as b_flag
FROM ( SELECT rnk
,cola
,colb
,CAST ('<M>' + REPLACE(colb, '|', '</M><M>') + '</M>' AS XML) AS Data
FROM data2
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
)
select rnk,cola,colb,data,count(a_flag) as present_in_cola,count(b_flag) as present_in_colb
from combo_data
group by rnk,cola,colb,data
having count(a_flag) <> count(b_flag)
order by 1,2,3,4
+-----+-------------+---------+-------+-----------------+-----------------+
| rnk | cola | colb | data | present_in_cola | present_in_colb |
+-----+-------------+---------+-------+-----------------+-----------------+
| 2 | PLATE | SPOON | PLATE | 1 | 0 |
| 2 | PLATE | SPOON | SPOON | 0 | 1 |
| 3 | OIL|JUG|MAT | JUG|MAT | OIL | 1 | 0 |
+-----+-------------+---------+-------+-----------------+-----------------+
db fiddle link https://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=4c677e8628d2734305f2b7f1923e583b
通过使用 CROSS APPLY
将 2 个字符串转换为 XML 类型。
然后可以在 EXISTS
子句中比较那些 XML 的节点值。
示例数据:
CREATE TABLE YourTable ( ID INT IDENTITY(1,1) PRIMARY KEY, ColA NVARCHAR(100), ColB NVARCHAR(100) ); INSERT INTO YourTable (ColA, ColB) VALUES ('PLATE|SPOON|GLASS', 'PLATE|GLASS|SPOON') , ('PLATE', 'SPOON') , ('OIL|JUG|MAT', 'JUG|MAT') , ('SPOON', 'SPOON') , ('OIL|MAT', 'MAT|OIL'); GO
查询:
SELECT t.* FROM YourTable t CROSS APPLY ( SELECT CAST('<a>'+REPLACE(t.ColA,'|','</a><a>')+'</a>' AS XML) AS XmlA, CAST('<b>'+REPLACE(t.ColB,'|','</b><b>')+'</b>' AS XML) AS XmlB ) caX WHERE EXISTS ( SELECT 1 FROM ( ( SELECT a.val.value('.','nvarchar(100)') AS val FROM caX.XmlA.nodes('/a') AS a(val) EXCEPT SELECT b.val.value('.','nvarchar(100)') AS val FROM caX.XmlB.nodes('/b') AS b(val) ) UNION ALL ( SELECT b.val.value('.','nvarchar(100)') AS val FROM caX.XmlB.nodes('/b') AS b(val) EXCEPT SELECT a.val.value('.','nvarchar(100)') AS val FROM caX.XmlA.nodes('/a') AS a(val) ) ) q );
结果:
ID | ColA | ColB -: | :---------- | :------ 2 | PLATE | SPOON 3 | OIL|JUG|MAT | JUG|MAT
在 db<>fiddle here
上测试额外:
在 Sql Server 2016 及更高版本中,STRING_SPLIT 功能可用。
那么这个较短的替代方案将起作用:
SELECT t.*
FROM YourTable t
WHERE EXISTS
(
SELECT 1
FROM STRING_SPLIT(ColA,'|') a
FULL JOIN STRING_SPLIT(ColB,'|') b
ON a.value = b.value
WHERE a.value IS NULL
OR b.value IS NULL
);