SQL 在导入期间拆分行 - 运行 对特定行插入两次
SQL Split line during import - run INSRERT INTO twice for specific lines
让我们假设一个超级简化的数据示例:
ID AKey AVal
-----------------
1 AB 94
2 Q 48
3 Z 56
4 AB 12
5 T 77
... ... ...
我想在我的导入脚本中将 "AB" 分成单独的行 "A" 和 "B",我通常这样做:
INSERT INTO MyNewTable
SELECT
SRC.ID as OldIDRef,
SRC.AKey as NewKey,
SRC.AVal as NewVal
FROM OldTable as SRC
所以基本上,我想复制 select 中的 "AB" 行并执行一些特定于该行的计算(假设将 AVal 除以 2)
我能想到的唯一解决办法是这样的:
INSERT INTO MyNewTable
SELECT
SRC.ID as OldIDRef,
CASE SRC.AKey = 'BA' THEN SUBSTRING(SRC.AKey,1,1) END as NewKey,
CASE SRC.AKey = 'BA' THEN SRC.AVal / 2 END as NewVal
FROM OldTable as SRC
UNION ALL
SELECT
SRC.ID as OldIDRef,
SUBSTRING(SRC.AKey,2,1) as NewKey,
SRC.AVal / 4 + 10 as NewVal
FROM OldTable as SRC
WHERE SRC.AKey = 'BA'
在我的导入中我会多次需要这样的过程,所以我想知道,如果我没有错过一些更简单的解决方案?
无论如何,这是脚本,它适用于给定的值并且应该比 UNION ALL 快两倍:
;WITH s1 as (
SELECT ID, LEFT(AKey,1) as NewKey1, AVal / 2 as NewVal1
, RIGHT(AKey,1) as NewKey2, AVal / 4 + 10 as NewVal2
, AKey, AVal
FROM Split)
SELECT ID as OldIDRef,
CASE NKey.AKey WHEN 'A' THEN NewKey1 WHEN 'B' THEN NewKey2 ELSE s1.AKey END as NewKey,
CASE NKey.AKey WHEN 'A' THEN NewVal1 WHEN 'B' THEN NewVal2 ELSE s1.AVal END as NewVal
FROM s1
INNER JOIN (SELECT 'A' as AKey UNION ALL SELECT 'B' UNION ALL SELECT NULL) as NKey
ON NKey.AKey = NewKey1 or NKey.AKey = NewKey2 or (NKey.AKey is Null and not (NewKey1 = 'A' and NewKey2 = 'B') )
假设您可能正在寻找一个答案,该答案可以缩放到密钥长度中的 N# 个字符,并且可以将新值分配给从中拆分出来的键数。我会使用递归 cte 来完成它。使用您的示例数据添加另一行,其中包含 3 个字符,如 'GHI' 和 运行 此代码,并查看结果比例超过 2 个字符。
;WITH cteRecursive AS (
SELECT
Id
,AKey
,LEFT(AKey,1) AS NewAKey
,RIGHT(Akey,LEN(AKey) - 1) AS RemainingKey
,AVal
,1 AS [Level]
FROM
@Table
UNION ALL
SELECT
t.Id
,t.AKey
,LEFT(c.RemainingKey,1) AS NewAKey
,RIGHT(RemainingKey,LEN(RemainingKey) - 1) AS RemainingKey
,t.AVal
,c.[Level] + 1 AS [Level]
FROM
@Table t
INNER JOIN cteRecursive c
ON t.Id = c.Id
AND LEN(c.RemainingKey) > 0
)
SELECT
Id
,AKey AS OriginalAKey
,NewAKey
,AVal AS OriginalAVal
,AVal / 2.00 AS NewVal
,AVal / CAST(MAX([Level]) OVER (PARTITION BY Id) AS DECIMAL(4,2)) AS NewValAsPortionOfLevel
,AVal / CAST(LEN(AKey) AS DECIMAL(4,2)) AS NewValAsPortionOfKeyLength
FROM
cteRecursive
这是我用的 Table 变量,如果你需要的话
DECLARE @Table AS TABLE (Id INT IDENTITY(1,1), AKey VARCHAR(100), AVal INT)
INSERT INTO @Table (AKey, AVal)
VALUES ('AB',94),('Q',48),('Z',56),('AB',12),('T',77),('ghi',100)
如果不拆分密钥,您实际上可以简化递归 cte 并走那条路。通过使用 Level < LEN(AKey)
递归将在正确的位置停止,您不需要任何其他字符串操作。
;WITH cteRecursive AS (
SELECT
Id
,AKey
,AVal
,1 AS [Level]
FROM
@Table
UNION ALL
SELECT
t.Id
,t.AKey
,t.AVal
,c.[Level] + 1 AS [Level]
FROM
@Table t
INNER JOIN cteRecursive c
ON t.Id = c.Id
AND c.[Level] < LEN(t.Akey)
)
SELECT
Id
,AKey AS OriginalAKey
,AVal AS OriginalAVal
,AVal / 2.00 AS NewVal
,AVal / CAST(MAX([Level]) OVER (PARTITION BY Id) AS DECIMAL(4,2)) AS NewValAsPortionOfLevel
,AVal / CAST(LEN(AKey) AS DECIMAL(4,2)) AS NewValAsPortionOfKeyLength
FROM
cteRecursive
如果你有一个非常大的数据集并且不想使用递归,还有另一种技术,你可以构建一个 Tally Table 来加入。我很想知道哪个表现更好。我实际上有一个永久计数 table 用于我使用的数据仓库的 ETL 的一些记录操作,但你可能也应该使用临时 table 而不是 table 变量.不管怎样,就是这个方法。
DECLARE @TallyTable AS TABLE (I INT)
DECLARE @MaxLen INT
SELECT @MaxLen = MAX(LEN(AKey)) FROM @Table
IF (@MaxLen > 0)
BEGIN
WHILE @MaxLen > 0
BEGIN
INSERT INTO @TallyTable (I) VALUES (@MaxLen)
SET @MaxLen -= 1
END
END
SELECT
*
,NewValueApportionedByLengthOfKey = CAST(AVal AS DECIMAL) / ISNULL(NULLIF(LEN(AKey),0),1)
FROM
@Table t
INNER JOIN @TallyTable tt
ON LEN(t.AKey) >= tt.I
请注意,所有这些方法都假定 AKey 永远不会为 NULL 或长度为 0,但如果需要,所有这些方法都可以很容易地适应处理。
让我们假设一个超级简化的数据示例:
ID AKey AVal
-----------------
1 AB 94
2 Q 48
3 Z 56
4 AB 12
5 T 77
... ... ...
我想在我的导入脚本中将 "AB" 分成单独的行 "A" 和 "B",我通常这样做:
INSERT INTO MyNewTable
SELECT
SRC.ID as OldIDRef,
SRC.AKey as NewKey,
SRC.AVal as NewVal
FROM OldTable as SRC
所以基本上,我想复制 select 中的 "AB" 行并执行一些特定于该行的计算(假设将 AVal 除以 2)
我能想到的唯一解决办法是这样的:
INSERT INTO MyNewTable
SELECT
SRC.ID as OldIDRef,
CASE SRC.AKey = 'BA' THEN SUBSTRING(SRC.AKey,1,1) END as NewKey,
CASE SRC.AKey = 'BA' THEN SRC.AVal / 2 END as NewVal
FROM OldTable as SRC
UNION ALL
SELECT
SRC.ID as OldIDRef,
SUBSTRING(SRC.AKey,2,1) as NewKey,
SRC.AVal / 4 + 10 as NewVal
FROM OldTable as SRC
WHERE SRC.AKey = 'BA'
在我的导入中我会多次需要这样的过程,所以我想知道,如果我没有错过一些更简单的解决方案?
无论如何,这是脚本,它适用于给定的值并且应该比 UNION ALL 快两倍:
;WITH s1 as (
SELECT ID, LEFT(AKey,1) as NewKey1, AVal / 2 as NewVal1
, RIGHT(AKey,1) as NewKey2, AVal / 4 + 10 as NewVal2
, AKey, AVal
FROM Split)
SELECT ID as OldIDRef,
CASE NKey.AKey WHEN 'A' THEN NewKey1 WHEN 'B' THEN NewKey2 ELSE s1.AKey END as NewKey,
CASE NKey.AKey WHEN 'A' THEN NewVal1 WHEN 'B' THEN NewVal2 ELSE s1.AVal END as NewVal
FROM s1
INNER JOIN (SELECT 'A' as AKey UNION ALL SELECT 'B' UNION ALL SELECT NULL) as NKey
ON NKey.AKey = NewKey1 or NKey.AKey = NewKey2 or (NKey.AKey is Null and not (NewKey1 = 'A' and NewKey2 = 'B') )
假设您可能正在寻找一个答案,该答案可以缩放到密钥长度中的 N# 个字符,并且可以将新值分配给从中拆分出来的键数。我会使用递归 cte 来完成它。使用您的示例数据添加另一行,其中包含 3 个字符,如 'GHI' 和 运行 此代码,并查看结果比例超过 2 个字符。
;WITH cteRecursive AS (
SELECT
Id
,AKey
,LEFT(AKey,1) AS NewAKey
,RIGHT(Akey,LEN(AKey) - 1) AS RemainingKey
,AVal
,1 AS [Level]
FROM
@Table
UNION ALL
SELECT
t.Id
,t.AKey
,LEFT(c.RemainingKey,1) AS NewAKey
,RIGHT(RemainingKey,LEN(RemainingKey) - 1) AS RemainingKey
,t.AVal
,c.[Level] + 1 AS [Level]
FROM
@Table t
INNER JOIN cteRecursive c
ON t.Id = c.Id
AND LEN(c.RemainingKey) > 0
)
SELECT
Id
,AKey AS OriginalAKey
,NewAKey
,AVal AS OriginalAVal
,AVal / 2.00 AS NewVal
,AVal / CAST(MAX([Level]) OVER (PARTITION BY Id) AS DECIMAL(4,2)) AS NewValAsPortionOfLevel
,AVal / CAST(LEN(AKey) AS DECIMAL(4,2)) AS NewValAsPortionOfKeyLength
FROM
cteRecursive
这是我用的 Table 变量,如果你需要的话
DECLARE @Table AS TABLE (Id INT IDENTITY(1,1), AKey VARCHAR(100), AVal INT)
INSERT INTO @Table (AKey, AVal)
VALUES ('AB',94),('Q',48),('Z',56),('AB',12),('T',77),('ghi',100)
如果不拆分密钥,您实际上可以简化递归 cte 并走那条路。通过使用 Level < LEN(AKey)
递归将在正确的位置停止,您不需要任何其他字符串操作。
;WITH cteRecursive AS (
SELECT
Id
,AKey
,AVal
,1 AS [Level]
FROM
@Table
UNION ALL
SELECT
t.Id
,t.AKey
,t.AVal
,c.[Level] + 1 AS [Level]
FROM
@Table t
INNER JOIN cteRecursive c
ON t.Id = c.Id
AND c.[Level] < LEN(t.Akey)
)
SELECT
Id
,AKey AS OriginalAKey
,AVal AS OriginalAVal
,AVal / 2.00 AS NewVal
,AVal / CAST(MAX([Level]) OVER (PARTITION BY Id) AS DECIMAL(4,2)) AS NewValAsPortionOfLevel
,AVal / CAST(LEN(AKey) AS DECIMAL(4,2)) AS NewValAsPortionOfKeyLength
FROM
cteRecursive
如果你有一个非常大的数据集并且不想使用递归,还有另一种技术,你可以构建一个 Tally Table 来加入。我很想知道哪个表现更好。我实际上有一个永久计数 table 用于我使用的数据仓库的 ETL 的一些记录操作,但你可能也应该使用临时 table 而不是 table 变量.不管怎样,就是这个方法。
DECLARE @TallyTable AS TABLE (I INT)
DECLARE @MaxLen INT
SELECT @MaxLen = MAX(LEN(AKey)) FROM @Table
IF (@MaxLen > 0)
BEGIN
WHILE @MaxLen > 0
BEGIN
INSERT INTO @TallyTable (I) VALUES (@MaxLen)
SET @MaxLen -= 1
END
END
SELECT
*
,NewValueApportionedByLengthOfKey = CAST(AVal AS DECIMAL) / ISNULL(NULLIF(LEN(AKey),0),1)
FROM
@Table t
INNER JOIN @TallyTable tt
ON LEN(t.AKey) >= tt.I
请注意,所有这些方法都假定 AKey 永远不会为 NULL 或长度为 0,但如果需要,所有这些方法都可以很容易地适应处理。