T-SQL 函数提取日期
T-SQL function to extract dates
我创建了一个能够从字符串中提取日期的 T-SQL 函数。
当日期没有 .
.
时函数有效
请帮助找到缺失的部分,使此函数适用于带句点的日期。
带句点的日期有两种不同的风格:
MM.DD.YYYY
M.DD.YY
我的函数代码:
IF OBJECT_ID(N'tempdb..#fileNameTable', N'U') IS NOT NULL
DROP TABLE #fileNameTable
CREATE TABLE #fileNameTable
(
[fName] [VARCHAR](250) NULL
)
INSERT INTO #fileNameTable (fName)
SELECT '9999999991_Agent Name_08.02.2018.WAV'
INSERT INTO #fileNameTable (fName)
SELECT '9999999999 - Internal ID 1446683 (Pedro) 6.26.17 WB.mp3'
INSERT INTO #fileNameTable (fName)
SELECT '9999999998 - Internal ID 1464807 (John) 7.11.17.mp3'
INSERT INTO #fileNameTable (fName)
SELECT '9999999997 - Internal ID 1447503 (Marta) 6.27.17.mp3'
INSERT INTO #fileNameTable (fName)
SELECT '9999999996 - Internal ID 1437403 (Ruby) 6.20.17.mp3'
INSERT INTO #fileNameTable (fName)
SELECT 'rc_20200817_1612_9999999995_NJ.wav'
INSERT INTO #fileNameTable (fName)
SELECT 'rc_20200817_1543_9999999994_PA.wav'
INSERT INTO #fileNameTable (fName)
SELECT 'rc_20200817_1211_9999999993_MA.wav'
INSERT INTO #fileNameTable (fName)
SELECT 'rc_20200817_1211_9999999992_MD.wav'
SELECT *, [dbo].[ExtractDateFromFileName](fName)
FROM #fileNameTable
CREATE FUNCTION [dbo].[ExtractDateFromFileName]
(@str VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE @validchars VARCHAR(MAX)
SET @validchars = '[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]'
DECLARE @idx INT
SET @idx = PATINDEX('%'+ @validchars +'%',@str)
IF @idx > 0 AND (@idx = LEN(@str) - 8
OR PATINDEX(SUBSTRING(@str, @idx + 9, 1), '[0-9]') = 0)
SET @str = SUBSTRING(@str, PATINDEX('%' + @validchars +'%', @str), 8)
ELSE
SET @str = ''
RETURN @str
END
我建议您将其实现为 table-valued 函数,我尝试重写您的函数以适用于示例数据中的两种类型的日期。它可能仍需要对 edge-cases 进行一些调整,但应该大部分都在那里。
在我的台式电脑上测试 130 万行(您的示例数据,通过笛卡尔连接复制),您当前的 scalar-valued 函数具有以下 statistics time
数据:
SQL Server Execution Times: CPU time = 19886 ms, elapsed time = 19961
ms.
用下面的函数同样的130万行有以下次数:
SQL Server Execution Times: CPU time = 7122 ms, elapsed time = 7296
ms.
table-valued 函数 快很多 因为 SQL 服务器能够 内联 它到查询中,而不是 运行 每行一个单独的函数 stand-alone,即 RBAR.
create or alter function [dbo].[ExtractDateFromFileName2](@str varchar(256))
returns table
as return
select coalesce(
Try_Convert(date,Substring(string, d1.v,8),111),
Try_Convert(date,Substring(string, d3.v,10),104),
Try_Convert(date,Substring(string, d2.v,7),1)
) [Value]
from (select @str string)s
outer apply (values(PatIndex('%[1-3][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%',string)))d1(v)
outer apply (values(PatIndex('%[0-9].[0-9][0-9].[0-9][0-9]%',string)))d2(v)
outer apply (values(PatIndex('%[0-9][0-9].[0-9][0-9].[0-9][0-9][0-9][0-9]%',string)))d3(v);
另一种方法:
CREATE FUNCTION dbo.ExtractDateFromGarbage ( @garbage varchar(250))
RETURNS TABLE WITH SCHEMABINDING
AS
RETURN
(
SELECT theDate = TRY_CONVERT(date,
x + REPLACE(LEFT(y, PATINDEX('%[^0-9]%', y+'.')-1),'.','/'))
FROM (SELECT g = @garbage) AS f
CROSS APPLY (VALUES (PATINDEX('%[01 ][0-9].%[0-9].[0-9][0-9]%', g),
PATINDEX('%2[0-9][0-9][0-9][0-1][0-9][0-3][0-9]%', g))) AS v(s,t)
CROSS APPLY (VALUES (SUBSTRING(g, COALESCE(NULLIF(s,0),t), 250))) AS q(q)
CROSS APPLY (VALUES(LEFT(q,6), SUBSTRING(q,7,250))) AS z(x,y)
);
它处理了 Stu 不处理的一种极端情况 (mm.d.yy
):
故事的寓意仍然是您应该在其他地方执行此操作(并在数据到达 SQL 服务器后以正确的数据类型存储正确的日期)。
我创建了一个能够从字符串中提取日期的 T-SQL 函数。
当日期没有 .
.
请帮助找到缺失的部分,使此函数适用于带句点的日期。
带句点的日期有两种不同的风格:
MM.DD.YYYY
M.DD.YY
我的函数代码:
IF OBJECT_ID(N'tempdb..#fileNameTable', N'U') IS NOT NULL
DROP TABLE #fileNameTable
CREATE TABLE #fileNameTable
(
[fName] [VARCHAR](250) NULL
)
INSERT INTO #fileNameTable (fName)
SELECT '9999999991_Agent Name_08.02.2018.WAV'
INSERT INTO #fileNameTable (fName)
SELECT '9999999999 - Internal ID 1446683 (Pedro) 6.26.17 WB.mp3'
INSERT INTO #fileNameTable (fName)
SELECT '9999999998 - Internal ID 1464807 (John) 7.11.17.mp3'
INSERT INTO #fileNameTable (fName)
SELECT '9999999997 - Internal ID 1447503 (Marta) 6.27.17.mp3'
INSERT INTO #fileNameTable (fName)
SELECT '9999999996 - Internal ID 1437403 (Ruby) 6.20.17.mp3'
INSERT INTO #fileNameTable (fName)
SELECT 'rc_20200817_1612_9999999995_NJ.wav'
INSERT INTO #fileNameTable (fName)
SELECT 'rc_20200817_1543_9999999994_PA.wav'
INSERT INTO #fileNameTable (fName)
SELECT 'rc_20200817_1211_9999999993_MA.wav'
INSERT INTO #fileNameTable (fName)
SELECT 'rc_20200817_1211_9999999992_MD.wav'
SELECT *, [dbo].[ExtractDateFromFileName](fName)
FROM #fileNameTable
CREATE FUNCTION [dbo].[ExtractDateFromFileName]
(@str VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE @validchars VARCHAR(MAX)
SET @validchars = '[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]'
DECLARE @idx INT
SET @idx = PATINDEX('%'+ @validchars +'%',@str)
IF @idx > 0 AND (@idx = LEN(@str) - 8
OR PATINDEX(SUBSTRING(@str, @idx + 9, 1), '[0-9]') = 0)
SET @str = SUBSTRING(@str, PATINDEX('%' + @validchars +'%', @str), 8)
ELSE
SET @str = ''
RETURN @str
END
我建议您将其实现为 table-valued 函数,我尝试重写您的函数以适用于示例数据中的两种类型的日期。它可能仍需要对 edge-cases 进行一些调整,但应该大部分都在那里。
在我的台式电脑上测试 130 万行(您的示例数据,通过笛卡尔连接复制),您当前的 scalar-valued 函数具有以下 statistics time
数据:
SQL Server Execution Times: CPU time = 19886 ms, elapsed time = 19961 ms.
用下面的函数同样的130万行有以下次数:
SQL Server Execution Times: CPU time = 7122 ms, elapsed time = 7296 ms.
table-valued 函数 快很多 因为 SQL 服务器能够 内联 它到查询中,而不是 运行 每行一个单独的函数 stand-alone,即 RBAR.
create or alter function [dbo].[ExtractDateFromFileName2](@str varchar(256))
returns table
as return
select coalesce(
Try_Convert(date,Substring(string, d1.v,8),111),
Try_Convert(date,Substring(string, d3.v,10),104),
Try_Convert(date,Substring(string, d2.v,7),1)
) [Value]
from (select @str string)s
outer apply (values(PatIndex('%[1-3][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%',string)))d1(v)
outer apply (values(PatIndex('%[0-9].[0-9][0-9].[0-9][0-9]%',string)))d2(v)
outer apply (values(PatIndex('%[0-9][0-9].[0-9][0-9].[0-9][0-9][0-9][0-9]%',string)))d3(v);
另一种方法:
CREATE FUNCTION dbo.ExtractDateFromGarbage ( @garbage varchar(250))
RETURNS TABLE WITH SCHEMABINDING
AS
RETURN
(
SELECT theDate = TRY_CONVERT(date,
x + REPLACE(LEFT(y, PATINDEX('%[^0-9]%', y+'.')-1),'.','/'))
FROM (SELECT g = @garbage) AS f
CROSS APPLY (VALUES (PATINDEX('%[01 ][0-9].%[0-9].[0-9][0-9]%', g),
PATINDEX('%2[0-9][0-9][0-9][0-1][0-9][0-3][0-9]%', g))) AS v(s,t)
CROSS APPLY (VALUES (SUBSTRING(g, COALESCE(NULLIF(s,0),t), 250))) AS q(q)
CROSS APPLY (VALUES(LEFT(q,6), SUBSTRING(q,7,250))) AS z(x,y)
);
它处理了 Stu 不处理的一种极端情况 (mm.d.yy
):
故事的寓意仍然是您应该在其他地方执行此操作(并在数据到达 SQL 服务器后以正确的数据类型存储正确的日期)。