特殊字符 - Sql
Special Characters - Sql
如何在 Sql 服务器的列中获取特殊字符?
我有电子邮件列表,我必须找到特殊的字符,例如下面的示例
**Email**
JóhnSnow@gmail.com
Khãlessi@gmail.com
As u see above , there's '~' and '´'as special characters . Might be appear others characters like '..' or other else.
我正在开发 Sql Server 2012,
有人有解决问题的建议吗?
要提取特殊字符,您首先需要将字符串拆分成行,这样您就可以单独查询每一行,这可以使用数字 table 来完成。如果你没有,它们很容易即时创建:
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3)
SELECT Number
FROM Numbers;
这给出了 1-10000 的数字列表。关于此的更多信息 here.
然后您可以使用条件 Number < LEN(Email)
将其加入您的数据以确保您为电子邮件中的每个字符返回一行,然后使用 SUBSTRING()
提取位置 n
:
DECLARE @T TABLE (ID INT IDENTITY, Email NVARCHAR(255));
INSERT @T (Email)
VALUES (N'JóhnSnów@gmail.com'), (N'Khãlessi@gmail.com'), ('NedStark@gmail.com');
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3)
SELECT t.ID,
t.Email,
Character = SUBSTRING(t.Email, n.Number, 1)
FROM @T AS t
INNER JOIN Numbers n
ON n.Number < LEN(t.Email)
ORDER BY t.ID;
给出:
ID Email Character
-----------------------------
1 JóhnSnow@gmail.com J
1 JóhnSnow@gmail.com ó
1 JóhnSnow@gmail.com h
1 JóhnSnow@gmail.com n
1 JóhnSnow@gmail.com S
1 JóhnSnow@gmail.com n
1 JóhnSnow@gmail.com ó
1 JóhnSnow@gmail.com w
.....
然后您可以提取特殊字符,方法是使用排序规则 SQL_Latin1_General_Cp1251_CS_AS
将它们转换为 VARCHAR
,并检查其是否为原始字符:
DECLARE @T TABLE (ID INT IDENTITY, Email NVARCHAR(255));
INSERT @T (Email)
VALUES (N'JóhnSnów@gmail.com'), (N'Khãlessi@gmail.com'), ('NedStark@gmail.com');
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3),
AllCharacters as
( SELECT t.ID,
t.Email,
Character = SUBSTRING(t.Email, n.Number, 1),
Position = n.Number
FROM @T AS t
INNER JOIN Numbers n
ON n.Number < LEN(t.Email)
)
SELECT ac.ID, ac.Character, ac.Position
FROM AllCharacters AS ac
WHERE CONVERT(CHAR(1), ac.Character) COLLATE SQL_Latin1_General_Cp1251_CS_AS <> ac.Character
ORDER BY ac.ID;
结果
ID Email Character Position
----------------------------------------------------
1 JóhnSnów@gmail.com ó 2
1 JóhnSnów@gmail.com ó 7
2 Khãlessi@gmail.com ã 3
最后,如果需要,您可以使用 XML extensions to concatenate 这些字符到单个列中:
DECLARE @T TABLE (ID INT IDENTITY, Email NVARCHAR(255));
INSERT @T (Email)
VALUES (N'JóhnSnów@gmail.com'), (N'Khãlessi@gmail.com'), ('NedStark@gmail.com');
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3),
AllCharacters as
( SELECT t.ID,
t.Email,
Character = SUBSTRING(t.Email, n.Number, 1),
Position = n.Number
FROM @T AS t
INNER JOIN Numbers n
ON n.Number < LEN(t.Email)
), SpecialCharacters AS
( SELECT ac.ID, ac.Character, ac.Position
FROM AllCharacters AS ac
WHERE CONVERT(CHAR(1), ac.Character) COLLATE SQL_Latin1_General_Cp1251_CS_AS <> ac.Character
)
SELECT t.ID,
t.Email,
SpecialCharacters = ISNULL(STUFF(s.SpecialCharacterList.value('.', 'NVARCHAR(255)'), 1, 2, ''), '')
FROM @T AS T
CROSS APPLY
( SELECT CONCAT(N', ', s.Character, '(', Position, ')')
FROM SpecialCharacters AS s
WHERE s.ID = t.ID
ORDER BY Position
FOR XML PATH(''), TYPE
) s (SpecialCharacterList)
ORDER BY ID;
结果
ID Email SpecialCharacters
------------------------------------------------
1 JóhnSnów@gmail.com ó(2), ó(7)
2 Khãlessi@gmail.com ã(3)
3 NedStark@gmail.com
顺便说一句,如果您要这样做,将您视为特殊字符的内容存储在 table 中可能更适合您的需要,而不是依赖于特定排序规则的代码页,您只需要更改此行:
WHERE CONVERT(CHAR(1), ac.Character) COLLATE SQL_Latin1_General_Cp1251_CS_AS <> ac.Character
对于:
WHERE EXISTS (SELECT 1 FROM MySpecialCharacterTable AS sct WHERE sct.Character = ac.Character)
如何在 Sql 服务器的列中获取特殊字符?
我有电子邮件列表,我必须找到特殊的字符,例如下面的示例
**Email**
JóhnSnow@gmail.com
Khãlessi@gmail.com
As u see above , there's '~' and '´'as special characters . Might be appear others characters like '..' or other else.
我正在开发 Sql Server 2012,
有人有解决问题的建议吗?
要提取特殊字符,您首先需要将字符串拆分成行,这样您就可以单独查询每一行,这可以使用数字 table 来完成。如果你没有,它们很容易即时创建:
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3)
SELECT Number
FROM Numbers;
这给出了 1-10000 的数字列表。关于此的更多信息 here.
然后您可以使用条件 Number < LEN(Email)
将其加入您的数据以确保您为电子邮件中的每个字符返回一行,然后使用 SUBSTRING()
提取位置 n
:
DECLARE @T TABLE (ID INT IDENTITY, Email NVARCHAR(255));
INSERT @T (Email)
VALUES (N'JóhnSnów@gmail.com'), (N'Khãlessi@gmail.com'), ('NedStark@gmail.com');
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3)
SELECT t.ID,
t.Email,
Character = SUBSTRING(t.Email, n.Number, 1)
FROM @T AS t
INNER JOIN Numbers n
ON n.Number < LEN(t.Email)
ORDER BY t.ID;
给出:
ID Email Character
-----------------------------
1 JóhnSnow@gmail.com J
1 JóhnSnow@gmail.com ó
1 JóhnSnow@gmail.com h
1 JóhnSnow@gmail.com n
1 JóhnSnow@gmail.com S
1 JóhnSnow@gmail.com n
1 JóhnSnow@gmail.com ó
1 JóhnSnow@gmail.com w
.....
然后您可以提取特殊字符,方法是使用排序规则 SQL_Latin1_General_Cp1251_CS_AS
将它们转换为 VARCHAR
,并检查其是否为原始字符:
DECLARE @T TABLE (ID INT IDENTITY, Email NVARCHAR(255));
INSERT @T (Email)
VALUES (N'JóhnSnów@gmail.com'), (N'Khãlessi@gmail.com'), ('NedStark@gmail.com');
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3),
AllCharacters as
( SELECT t.ID,
t.Email,
Character = SUBSTRING(t.Email, n.Number, 1),
Position = n.Number
FROM @T AS t
INNER JOIN Numbers n
ON n.Number < LEN(t.Email)
)
SELECT ac.ID, ac.Character, ac.Position
FROM AllCharacters AS ac
WHERE CONVERT(CHAR(1), ac.Character) COLLATE SQL_Latin1_General_Cp1251_CS_AS <> ac.Character
ORDER BY ac.ID;
结果
ID Email Character Position
----------------------------------------------------
1 JóhnSnów@gmail.com ó 2
1 JóhnSnów@gmail.com ó 7
2 Khãlessi@gmail.com ã 3
最后,如果需要,您可以使用 XML extensions to concatenate 这些字符到单个列中:
DECLARE @T TABLE (ID INT IDENTITY, Email NVARCHAR(255));
INSERT @T (Email)
VALUES (N'JóhnSnów@gmail.com'), (N'Khãlessi@gmail.com'), ('NedStark@gmail.com');
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3),
AllCharacters as
( SELECT t.ID,
t.Email,
Character = SUBSTRING(t.Email, n.Number, 1),
Position = n.Number
FROM @T AS t
INNER JOIN Numbers n
ON n.Number < LEN(t.Email)
), SpecialCharacters AS
( SELECT ac.ID, ac.Character, ac.Position
FROM AllCharacters AS ac
WHERE CONVERT(CHAR(1), ac.Character) COLLATE SQL_Latin1_General_Cp1251_CS_AS <> ac.Character
)
SELECT t.ID,
t.Email,
SpecialCharacters = ISNULL(STUFF(s.SpecialCharacterList.value('.', 'NVARCHAR(255)'), 1, 2, ''), '')
FROM @T AS T
CROSS APPLY
( SELECT CONCAT(N', ', s.Character, '(', Position, ')')
FROM SpecialCharacters AS s
WHERE s.ID = t.ID
ORDER BY Position
FOR XML PATH(''), TYPE
) s (SpecialCharacterList)
ORDER BY ID;
结果
ID Email SpecialCharacters
------------------------------------------------
1 JóhnSnów@gmail.com ó(2), ó(7)
2 Khãlessi@gmail.com ã(3)
3 NedStark@gmail.com
顺便说一句,如果您要这样做,将您视为特殊字符的内容存储在 table 中可能更适合您的需要,而不是依赖于特定排序规则的代码页,您只需要更改此行:
WHERE CONVERT(CHAR(1), ac.Character) COLLATE SQL_Latin1_General_Cp1251_CS_AS <> ac.Character
对于:
WHERE EXISTS (SELECT 1 FROM MySpecialCharacterTable AS sct WHERE sct.Character = ac.Character)