从单个串联值字符串中查找拆分成行的最低值和最高值
Find lowest and highest values split into rows from a single string of concatenated values
这是我的问题 的跟进:uzi 对我的问题给出了很好的回答。然而,我注意到一家新公司 Company3
也使用了单个数据点,例如帐户 6000,它不遵循以前公司的方式,这使得 uzi 的递归 cte 不适用。
因此,我觉得需要更改问题,但我相信由于对解决方案的影响很大,这种并发症会发出一个新问题而不是对我以前的问题进行编辑。
我需要从 Excel 工作簿中读取数据,其中数据以这种方式存储:
Company Accounts
Company1 (#3000...#3999)
Company2 (#4000..#4019)+(#4021..#4024)
Company3 (#5000..#5001)+#6000+(#6005..#6010)
我认为由于某些公司,例如Company3
具有单值的帐户,例如#6000
,我需要在这一步中创建如下所示的结果集:
Company FirstAcc LastAcc
Company1 3000 3999
Company2 4000 4019
Company2 4021 4024
Company3 5000 5001
Company3 6000 NULL
Company3 6005 6010
然后我将使用此 table 并将其与仅整数的 table 连接起来以获得最终 table 的外观,例如我的链接问题中的那个。
有人有什么想法吗?
我对另一个问题的@uzi 解决方案进行了一些编辑,其中我添加了三个其他 CTE 并使用了 windows 函数,如 LEAD()
和 [=12= 】 解决问题。我不知道是否有更简单的解决方案,但我认为这很好用。
with cte as (
select
company, replace(replace(replace(accounts,'(',''),')',''),'+','')+'#' accounts
from
(values ('company 1','#3000..#3999'),('company 2','(#4000..#4019)+(#4021..#4024)'),('company 3','(#5000..#5001)+#6000+(#6005..#6010)')) data(company, accounts)
)
, rcte as (
select
company, stuff(accounts, ind1, ind2 - ind1, '') acc, substring(accounts, ind1 + 1, ind2 - ind1 - 1) accounts
from
cte
cross apply (select charindex('#', accounts) ind1) ca
cross apply (select charindex('#', accounts, ind1 + 1) ind2) cb
union all
select
company, stuff(acc, ind1, ind2 - ind1, ''), substring(acc, ind1 + 1, ind2 - ind1 - 1)
from
rcte
cross apply (select charindex('#', acc) ind1) ca
cross apply (select charindex('#', acc, ind1 + 1) ind2) cb
where
len(acc)>1
) ,cte2 as (
select company, accounts as accounts_raw, Replace( accounts,'..','') as accounts,
LEAD(accounts) OVER(Partition by company ORDER BY accounts) ld,
ROW_NUMBER() OVER(ORDER BY accounts) rn
from rcte
) , cte3 as (
Select company,accounts,ld ,rn
from cte2
WHERE ld not like '%..'
) , cte4 as (
select * from cte3 where accounts not in (select ld from cte3 t1 where t1.rn < cte3.rn)
)
SELECT company,accounts,ld from cte4
UNION
SELECT DISTINCT company,ld,NULL from cte3 where accounts not in (select accounts from cte4 t1)
option (maxrecursion 0)
结果:
一个好的 t-sql 拆分器功能使这变得非常简单;我建议 delimitedSplit8k。这也将比递归 CTE 表现得更好。首先是示例数据:
-- your sample data
if object_id('tempdb..#yourtable') is not null drop table #yourtable;
create table #yourtable (company varchar(100), accounts varchar(8000));
insert #yourtable values ('Company1','(#3000...#3999)'),
('Company2','(#4000..#4019)+(#4021..#4024)'),('Company3','(#5000..#5001)+#6000+(#6005..#6010)');
和解决方案:
select
company,
firstAcc = max(case when split2.item not like '%)' then clean.Item end),
lastAcc = max(case when split2.item like '%)' then clean.Item end)
from #yourtable t
cross apply dbo.delimitedSplit8K(accounts, '+') split1
cross apply dbo.delimitedSplit8K(split1.Item, '.') split2
cross apply (values (replace(replace(split2.Item,')',''),'(',''))) clean(item)
where split2.item > ''
group by split1.Item, company;
结果:
company firstAcc lastAcc
--------- ---------- --------------
Company1 #3000 #3999
Company2 #4000 #4019
Company2 #4021 #4024
Company3 #6000 NULL
Company3 #5000 #5001
Company3 #6005 #6010
我认为列表 (#6005..#6010) 在您的 Excel 文件中表示为 #6005#6006#6007#6008#6009#6010。如果是这样并且没有间隙,请尝试此查询
with cte as (
select
company, replace(replace(replace(accounts,'(',''),')',''),'+','')+'#' accounts
from
(values ('company 1','#3000#3001#3002#3003'),('company 2','(#4000#4001)+(#4021#4022)'),('company 3','(#5000#5001)+#6000+(#6005#6006)')) data(company, accounts)
)
, rcte as (
select
company, stuff(accounts, ind1, ind2 - ind1, '') acc, substring(accounts, ind1 + 1, ind2 - ind1 - 1) accounts
from
cte
cross apply (select charindex('#', accounts) ind1) ca
cross apply (select charindex('#', accounts, ind1 + 1) ind2) cb
union all
select
company, stuff(acc, ind1, ind2 - ind1, ''), substring(acc, ind1 + 1, ind2 - ind1 - 1)
from
rcte
cross apply (select charindex('#', acc) ind1) ca
cross apply (select charindex('#', acc, ind1 + 1) ind2) cb
where
len(acc)>1
)
select
company, min(accounts) FirstAcc, case when max(accounts) =min(accounts) then null else max(accounts) end LastAcc
from (
select
company, accounts, accounts - row_number() over (partition by company order by accounts) group_
from
rcte
) t
group by company, group_
option (maxrecursion 0)
您似乎标记了 SSIS,因此我将提供一个使用脚本任务的解决方案。所有其他示例都需要加载到暂存 table.
- 使用您的正常 reader(Excel 可能)并加载
- 添加脚本转换组件
- 编辑组件
- 输入栏 - 检查公司和账户
- 输入和输出 - 添加一个新的输出并将其命名为 CompFirstLast
- 向其中添加三列 - 公司字符串、第一个整数和最后一个整数
打开脚本并粘贴以下代码
public override void Input0_ProcessInputRow(Input0Buffer Row)
{
//Create an array for each group to create rows out of by splitting on '+'
string[] SplitForRows = Row.Accounts.Split('+'); //Note single quotes denoting char
//Deal with each group and create the new Output
for (int i = 0; i < SplitForRows.Length; i++) //Loop each split column
{
CompFirstLastBuffer.AddRow();
CompFirstLastBuffer.Company = Row.Company; //This is static for each incoming row
//Clean up the string getting rid of (). and leaving a delimited list of #
string accts = SplitForRows[i].Replace("(", String.Empty).Replace(")", String.Empty).Replace(".", String.Empty).Substring(1);
//Split into Array
string[] accounts = accts.Split('#');
// Write out first and last and handle null
CompFirstLastBuffer.First = int.Parse(accounts[0]);
if (accounts.Length == 1)
CompFirstLastBuffer.Last_IsNull = true;
else
CompFirstLastBuffer.Last = int.Parse(accounts[1]);
}
}
确保使用正确的输出。
这是我的问题 Company3
也使用了单个数据点,例如帐户 6000,它不遵循以前公司的方式,这使得 uzi 的递归 cte 不适用。
因此,我觉得需要更改问题,但我相信由于对解决方案的影响很大,这种并发症会发出一个新问题而不是对我以前的问题进行编辑。
我需要从 Excel 工作簿中读取数据,其中数据以这种方式存储:
Company Accounts
Company1 (#3000...#3999)
Company2 (#4000..#4019)+(#4021..#4024)
Company3 (#5000..#5001)+#6000+(#6005..#6010)
我认为由于某些公司,例如Company3
具有单值的帐户,例如#6000
,我需要在这一步中创建如下所示的结果集:
Company FirstAcc LastAcc
Company1 3000 3999
Company2 4000 4019
Company2 4021 4024
Company3 5000 5001
Company3 6000 NULL
Company3 6005 6010
然后我将使用此 table 并将其与仅整数的 table 连接起来以获得最终 table 的外观,例如我的链接问题中的那个。
有人有什么想法吗?
我对另一个问题的@uzi 解决方案进行了一些编辑,其中我添加了三个其他 CTE 并使用了 windows 函数,如 LEAD()
和 [=12= 】 解决问题。我不知道是否有更简单的解决方案,但我认为这很好用。
with cte as (
select
company, replace(replace(replace(accounts,'(',''),')',''),'+','')+'#' accounts
from
(values ('company 1','#3000..#3999'),('company 2','(#4000..#4019)+(#4021..#4024)'),('company 3','(#5000..#5001)+#6000+(#6005..#6010)')) data(company, accounts)
)
, rcte as (
select
company, stuff(accounts, ind1, ind2 - ind1, '') acc, substring(accounts, ind1 + 1, ind2 - ind1 - 1) accounts
from
cte
cross apply (select charindex('#', accounts) ind1) ca
cross apply (select charindex('#', accounts, ind1 + 1) ind2) cb
union all
select
company, stuff(acc, ind1, ind2 - ind1, ''), substring(acc, ind1 + 1, ind2 - ind1 - 1)
from
rcte
cross apply (select charindex('#', acc) ind1) ca
cross apply (select charindex('#', acc, ind1 + 1) ind2) cb
where
len(acc)>1
) ,cte2 as (
select company, accounts as accounts_raw, Replace( accounts,'..','') as accounts,
LEAD(accounts) OVER(Partition by company ORDER BY accounts) ld,
ROW_NUMBER() OVER(ORDER BY accounts) rn
from rcte
) , cte3 as (
Select company,accounts,ld ,rn
from cte2
WHERE ld not like '%..'
) , cte4 as (
select * from cte3 where accounts not in (select ld from cte3 t1 where t1.rn < cte3.rn)
)
SELECT company,accounts,ld from cte4
UNION
SELECT DISTINCT company,ld,NULL from cte3 where accounts not in (select accounts from cte4 t1)
option (maxrecursion 0)
结果:
一个好的 t-sql 拆分器功能使这变得非常简单;我建议 delimitedSplit8k。这也将比递归 CTE 表现得更好。首先是示例数据:
-- your sample data
if object_id('tempdb..#yourtable') is not null drop table #yourtable;
create table #yourtable (company varchar(100), accounts varchar(8000));
insert #yourtable values ('Company1','(#3000...#3999)'),
('Company2','(#4000..#4019)+(#4021..#4024)'),('Company3','(#5000..#5001)+#6000+(#6005..#6010)');
和解决方案:
select
company,
firstAcc = max(case when split2.item not like '%)' then clean.Item end),
lastAcc = max(case when split2.item like '%)' then clean.Item end)
from #yourtable t
cross apply dbo.delimitedSplit8K(accounts, '+') split1
cross apply dbo.delimitedSplit8K(split1.Item, '.') split2
cross apply (values (replace(replace(split2.Item,')',''),'(',''))) clean(item)
where split2.item > ''
group by split1.Item, company;
结果:
company firstAcc lastAcc
--------- ---------- --------------
Company1 #3000 #3999
Company2 #4000 #4019
Company2 #4021 #4024
Company3 #6000 NULL
Company3 #5000 #5001
Company3 #6005 #6010
我认为列表 (#6005..#6010) 在您的 Excel 文件中表示为 #6005#6006#6007#6008#6009#6010。如果是这样并且没有间隙,请尝试此查询
with cte as (
select
company, replace(replace(replace(accounts,'(',''),')',''),'+','')+'#' accounts
from
(values ('company 1','#3000#3001#3002#3003'),('company 2','(#4000#4001)+(#4021#4022)'),('company 3','(#5000#5001)+#6000+(#6005#6006)')) data(company, accounts)
)
, rcte as (
select
company, stuff(accounts, ind1, ind2 - ind1, '') acc, substring(accounts, ind1 + 1, ind2 - ind1 - 1) accounts
from
cte
cross apply (select charindex('#', accounts) ind1) ca
cross apply (select charindex('#', accounts, ind1 + 1) ind2) cb
union all
select
company, stuff(acc, ind1, ind2 - ind1, ''), substring(acc, ind1 + 1, ind2 - ind1 - 1)
from
rcte
cross apply (select charindex('#', acc) ind1) ca
cross apply (select charindex('#', acc, ind1 + 1) ind2) cb
where
len(acc)>1
)
select
company, min(accounts) FirstAcc, case when max(accounts) =min(accounts) then null else max(accounts) end LastAcc
from (
select
company, accounts, accounts - row_number() over (partition by company order by accounts) group_
from
rcte
) t
group by company, group_
option (maxrecursion 0)
您似乎标记了 SSIS,因此我将提供一个使用脚本任务的解决方案。所有其他示例都需要加载到暂存 table.
- 使用您的正常 reader(Excel 可能)并加载
- 添加脚本转换组件
- 编辑组件
- 输入栏 - 检查公司和账户
- 输入和输出 - 添加一个新的输出并将其命名为 CompFirstLast
- 向其中添加三列 - 公司字符串、第一个整数和最后一个整数
打开脚本并粘贴以下代码
public override void Input0_ProcessInputRow(Input0Buffer Row) { //Create an array for each group to create rows out of by splitting on '+' string[] SplitForRows = Row.Accounts.Split('+'); //Note single quotes denoting char //Deal with each group and create the new Output for (int i = 0; i < SplitForRows.Length; i++) //Loop each split column { CompFirstLastBuffer.AddRow(); CompFirstLastBuffer.Company = Row.Company; //This is static for each incoming row //Clean up the string getting rid of (). and leaving a delimited list of # string accts = SplitForRows[i].Replace("(", String.Empty).Replace(")", String.Empty).Replace(".", String.Empty).Substring(1); //Split into Array string[] accounts = accts.Split('#'); // Write out first and last and handle null CompFirstLastBuffer.First = int.Parse(accounts[0]); if (accounts.Length == 1) CompFirstLastBuffer.Last_IsNull = true; else CompFirstLastBuffer.Last = int.Parse(accounts[1]); } }
确保使用正确的输出。