使用 SQL 从列单元格中删除重复的单词
Remove duplicate words from a column cell using SQL
从列单元格中删除重复的单词,例如:
数据列这是单个字段:'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA
'
但我想这样,"BLACKHEATH"应该只有一次,如果发现任何其他条目则应该将其删除
BLACKHEATH COLCHESTER CO2 0AA
我可以通过代码做到这一点,但我不知道如何通过 SQL 服务器中的 SQL 删除重复的单词。 SQL 指示我这样做。
任何帮助将不胜感激。
如果顺序无关紧要,你可以很容易地做到:
DECLARE @string VARCHAR(100) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA';
SELECT @string AS Source
, LTRIM((
SELECT DISTINCT ' ' + column1 AS [text()]
FROM dbo.SplitString(@string, ' ')
FOR XML PATH('')
)) AS UniqueWords;
这是什么意思?
dbo.SplitString
函数根据分隔符将您的字符串拆分成行
(space
在你的情况下)。请参阅 this article 以找到最适合您需要的 SplitString 函数。
DISTINCT
关键字删除了骗局
- 我们使用
FOR XML PATH('')
将它们连接在一起。
结果:
╔═══════════════════════════════════════════════╦═══════════════════════════════╗
║ Source ║ UniqueWords ║
╠═══════════════════════════════════════════════╬═══════════════════════════════╣
║ BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA ║ 0AA BLACKHEATH CO2 COLCHESTER ║
╚═══════════════════════════════════════════════╩═══════════════════════════════╝
如果你需要保持秩序,你必须创建一个函数,以原始顺序存储你的术语(可能使用 ROW_NUMBER())并为每个单独的术语排序(以删除重复项)然后你可以计算每个单词需要在哪里。我没有费心创建它,但它应该为您的字符串输出这样的结果:
╔═══════════╦═══════════╦════════════╗
║ WordOrder ║ TermOrder ║ Term ║
╠═══════════╬═══════════╬════════════╣
║ 1 ║ 1 ║ BLACKHEATH ║
║ 2 ║ 1 ║ 0AA ║
║ 3 ║ 2 ║ BLACKHEATH ║
║ 4 ║ 1 ║ COLCHESTER ║
║ 5 ║ 1 ║ CO2 ║
║ 6 ║ 2 ║ 0AA ║
╚═══════════╩═══════════╩════════════╝
可以在这样的查询中重复使用(@Splitted
是上面的 table):
SELECT @string AS Source
, LTRIM((
SELECT ' ' + Term AS [text()]
FROM @Splitted
WHERE TermOrder = 1
ORDER BY WordOrder
FOR XML PATH('')
)) AS UniqueWords;
它输出这个字符串:
╔═══════════════════════════════════════════════╦═══════════════════════════════╗
║ Source ║ UniqueWords ║
╠═══════════════════════════════════════════════╬═══════════════════════════════╣
║ BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA ║ BLACKHEATH 0AA COLCHESTER CO2 ║
╚═══════════════════════════════════════════════╩═══════════════════════════════╝
P.S。为什么在 SQL 中指示您执行此操作? SQL 服务器不能很好地处理文本数据,因此我建议在代码中这样做,因为它可能会影响您的性能。
declare @text varchar(max) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'
declare @i int = 1;
declare @nextCharacter varchar(max)
declare @word varchar(max)=''
declare @lastChar varchar(1) = substring(@text,len(@text),1)
--select @lastChar
declare @lastIndex varchar(max) = charindex(@lastChar,@text,len(@text))
select @lastIndex
--select len(@text)
create table #tmp (id int,word varchar(max))
while (@i <= len(@text))
begin
select @nextCharacter= substring(@text, @i, 1)
--select @nextCharacter,@i, @lastChar, len(@text)
if (@nextCharacter !=' ')
begin
select @word = @word + @nextCharacter
end
else
begin
insert into #tmp
values(@i,@word)
set @word=''
end
if (@i = len(@text) and @nextCharacter= @lastChar)
begin
insert into #tmp
values(@i,@word)
end
set @i = @i +1
end;
select id,word from #tmp order by id;
WITH tblTemp as
(
SELECT ROW_NUMBER() Over(PARTITION BY word ORDER BY id)
As RowNumber,* FROM #tmp
) --select * from tblTemp
select * into #tmp2 FROM tblTemp where RowNumber =1
declare @newString varchar(max)=''
select @newString = @newString +word+' ' from #tmp2 order by id
select rtrim(@newString)
drop table #tmp2
drop table #tmp
declare @text varchar(max) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'
declare @i int = 1;
declare @nextCharacter varchar(max)
declare @word varchar(max)=''
declare @lastChar varchar(1) = substring(@text,len(@text),1)
--select @lastChar
declare @lastIndex varchar(max) = charindex(@lastChar,@text,len(@text))
select @lastIndex
--select len(@text)
create table #tmp (id int,word varchar(max))
while (@i <= len(@text))
begin
select @nextCharacter= substring(@text, @i, 1)
--select @nextCharacter,@i, @lastChar, len(@text)
if (@nextCharacter !=' ')
begin
select @word = @word + @nextCharacter
end
else
begin
insert into #tmp
values(@i,@word)
set @word=''
end
if (@i = len(@text) and @nextCharacter= @lastChar)
begin
insert into #tmp
values(@i,@word)
end
set @i = @i +1
end;
--select id,word from #tmp order by id;
WITH tblTemp as
(
SELECT ROW_NUMBER() Over(PARTITION BY word ORDER BY id)
As RowNumber,* FROM #tmp
) --select * from tblTemp
select * into #tmp2 FROM tblTemp where RowNumber =1
declare @newString varchar(max)=''
select @newString = @newString +word+' ' from #tmp2 order by id
select rtrim(@newString) as FromAddress
drop table #tmp2
drop table #tmp
请删除 select 子句中的 ID。
从列单元格中删除重复的单词,例如:
数据列这是单个字段:'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA
'
但我想这样,"BLACKHEATH"应该只有一次,如果发现任何其他条目则应该将其删除
BLACKHEATH COLCHESTER CO2 0AA
我可以通过代码做到这一点,但我不知道如何通过 SQL 服务器中的 SQL 删除重复的单词。 SQL 指示我这样做。 任何帮助将不胜感激。
如果顺序无关紧要,你可以很容易地做到:
DECLARE @string VARCHAR(100) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA';
SELECT @string AS Source
, LTRIM((
SELECT DISTINCT ' ' + column1 AS [text()]
FROM dbo.SplitString(@string, ' ')
FOR XML PATH('')
)) AS UniqueWords;
这是什么意思?
dbo.SplitString
函数根据分隔符将您的字符串拆分成行 (space
在你的情况下)。请参阅 this article 以找到最适合您需要的 SplitString 函数。DISTINCT
关键字删除了骗局- 我们使用
FOR XML PATH('')
将它们连接在一起。
结果:
╔═══════════════════════════════════════════════╦═══════════════════════════════╗
║ Source ║ UniqueWords ║
╠═══════════════════════════════════════════════╬═══════════════════════════════╣
║ BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA ║ 0AA BLACKHEATH CO2 COLCHESTER ║
╚═══════════════════════════════════════════════╩═══════════════════════════════╝
如果你需要保持秩序,你必须创建一个函数,以原始顺序存储你的术语(可能使用 ROW_NUMBER())并为每个单独的术语排序(以删除重复项)然后你可以计算每个单词需要在哪里。我没有费心创建它,但它应该为您的字符串输出这样的结果:
╔═══════════╦═══════════╦════════════╗
║ WordOrder ║ TermOrder ║ Term ║
╠═══════════╬═══════════╬════════════╣
║ 1 ║ 1 ║ BLACKHEATH ║
║ 2 ║ 1 ║ 0AA ║
║ 3 ║ 2 ║ BLACKHEATH ║
║ 4 ║ 1 ║ COLCHESTER ║
║ 5 ║ 1 ║ CO2 ║
║ 6 ║ 2 ║ 0AA ║
╚═══════════╩═══════════╩════════════╝
可以在这样的查询中重复使用(@Splitted
是上面的 table):
SELECT @string AS Source
, LTRIM((
SELECT ' ' + Term AS [text()]
FROM @Splitted
WHERE TermOrder = 1
ORDER BY WordOrder
FOR XML PATH('')
)) AS UniqueWords;
它输出这个字符串:
╔═══════════════════════════════════════════════╦═══════════════════════════════╗
║ Source ║ UniqueWords ║
╠═══════════════════════════════════════════════╬═══════════════════════════════╣
║ BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA ║ BLACKHEATH 0AA COLCHESTER CO2 ║
╚═══════════════════════════════════════════════╩═══════════════════════════════╝
P.S。为什么在 SQL 中指示您执行此操作? SQL 服务器不能很好地处理文本数据,因此我建议在代码中这样做,因为它可能会影响您的性能。
declare @text varchar(max) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'
declare @i int = 1;
declare @nextCharacter varchar(max)
declare @word varchar(max)=''
declare @lastChar varchar(1) = substring(@text,len(@text),1)
--select @lastChar
declare @lastIndex varchar(max) = charindex(@lastChar,@text,len(@text))
select @lastIndex
--select len(@text)
create table #tmp (id int,word varchar(max))
while (@i <= len(@text))
begin
select @nextCharacter= substring(@text, @i, 1)
--select @nextCharacter,@i, @lastChar, len(@text)
if (@nextCharacter !=' ')
begin
select @word = @word + @nextCharacter
end
else
begin
insert into #tmp
values(@i,@word)
set @word=''
end
if (@i = len(@text) and @nextCharacter= @lastChar)
begin
insert into #tmp
values(@i,@word)
end
set @i = @i +1
end;
select id,word from #tmp order by id;
WITH tblTemp as
(
SELECT ROW_NUMBER() Over(PARTITION BY word ORDER BY id)
As RowNumber,* FROM #tmp
) --select * from tblTemp
select * into #tmp2 FROM tblTemp where RowNumber =1
declare @newString varchar(max)=''
select @newString = @newString +word+' ' from #tmp2 order by id
select rtrim(@newString)
drop table #tmp2
drop table #tmp
declare @text varchar(max) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'
declare @i int = 1;
declare @nextCharacter varchar(max)
declare @word varchar(max)=''
declare @lastChar varchar(1) = substring(@text,len(@text),1)
--select @lastChar
declare @lastIndex varchar(max) = charindex(@lastChar,@text,len(@text))
select @lastIndex
--select len(@text)
create table #tmp (id int,word varchar(max))
while (@i <= len(@text))
begin
select @nextCharacter= substring(@text, @i, 1)
--select @nextCharacter,@i, @lastChar, len(@text)
if (@nextCharacter !=' ')
begin
select @word = @word + @nextCharacter
end
else
begin
insert into #tmp
values(@i,@word)
set @word=''
end
if (@i = len(@text) and @nextCharacter= @lastChar)
begin
insert into #tmp
values(@i,@word)
end
set @i = @i +1
end;
--select id,word from #tmp order by id;
WITH tblTemp as
(
SELECT ROW_NUMBER() Over(PARTITION BY word ORDER BY id)
As RowNumber,* FROM #tmp
) --select * from tblTemp
select * into #tmp2 FROM tblTemp where RowNumber =1
declare @newString varchar(max)=''
select @newString = @newString +word+' ' from #tmp2 order by id
select rtrim(@newString) as FromAddress
drop table #tmp2
drop table #tmp
请删除 select 子句中的 ID。