在重复项中选择不同的连续行
Selecting distinct consecutive rows amongst duplicates
我有一个 table 结构如下:
create table roster
(
date date not null,
first nvarchar(20) not null,
second nvarchar(20) not null,
third nvarchar(20) not null,
fourth nvarchar(20) not null,
)
go
并且插入了以下数据:
insert into roster values ('2015-06-10 12:45:34', 'e', 'm', 'a', 'r')
insert into roster values ('2015-06-11 12:45:34', 'e', 'v', 'a', 'r')
insert into roster values ('2015-06-12 12:45:34', 'e', 'm', 'a', 'd')
insert into roster values ('2015-06-13 12:45:34', 'e', 'm', 'a', 'd') *
insert into roster values ('2015-06-14 12:45:34', 'e', 'm', 'a', 'r')
insert into roster values ('2015-06-15 12:45:34', 'e', 'm', 'a', 'r') *
insert into roster values ('2015-06-16 12:45:34', 'z', 'm', 't', 'r')
注意:* 标记重复。
如何 select 只有一个唯一的连续组合 "first"、"second"、"third" 和 "fourth"?例如,对于上面插入的数据,期望的输出是:
Date First Second Third Fourth
2015-06-10 12:45:34, e m a r
2015-06-11 12:45:34, e v a r
2015-06-12 12:45:34, e m a d
2015-06-14 12:45:34, e m a r
2015-06-16 12:45:34, z m t r
我正在寻找一种解决方案,当条目不再连续时(或序列被破坏时)保留条目,但删除连续条目的重复项。
我在这里看到过类似的问题,但我已经能够制定一个使用分组依据的解决方案。
如有任何帮助,我们将不胜感激
您可以按第一、第二、第三、第四的值分组
然后 select 第一次遇到这些值的日期是 min(date) 或最后一次出现是 max(date)
最后一次遇到的日期示例:fiddle
SELECT min(date) as startdate ,max(date) as enddate, first, second, third, fourth
from roster
GROUP BY first, second, third, fourth
编辑:编辑了之前的查询以包括开始和结束日期
额外:我在等待您的回复时正在玩的东西:包括值出现在 1 个字段中的日期列表:
SELECT first, second, third, fourth,
STUFF((
SELECT ',' + convert(varchar(25),T.date)
FROM roster T
WHERE A.first = T.first
AND A.second = T.second
AND A.third = T.third
AND A.fourth = T.fourth
ORDER BY T.date
FOR XML PATH('')), 1, 1, '') as dates
from roster A
GROUP BY first, second, third, fourth
编辑:我已经非常接近你想要的但不完全是,但是我不知道如何让它更接近,我想这就是我所做的,剩下的取决于别人 :D : SQLFIDDLE
SELECT b.date as startdate, a.date as enddate, a.first, a.second, a.third, a.fourth FROM
(Select ROW_NUMBER()
OVER (ORDER BY first, second, third, fourth,date ) AS Row,
date,
first,second,third,fourth
from roster) A
JOIN
(Select ROW_NUMBER()
OVER (ORDER BY first, second, third, fourth,date ) AS Row,
date,
first,second,third,fourth
from roster) B
ON A.row = b.row + 1
WHERE a.first = b.first
and a.second = b.second
and a.third = b.third
and a.fourth = b.fourth
UNION
select max(date) as startdate, null as enddate, first, second, third, fourth
FROM roster
group by first, second, third, fourth
having count(*) = 1;
如果您只需要一个 exists()
,为什么还需要一个 group by
?
;with data as
(
select ROW_NUMBER() OVER (ORDER BY date) AS number, * from roster
)
select * from data where
not exists -- Just compare with the previous column, if match say bye
(
select * from data prev where 1 = 1
and prev.first = data.first
and prev.second = data.second
and prev.third = data.third
and prev.fourth = data.fourth
and prev.number + 1 = data.number
)
编辑
;with data as
(
select
ROW_NUMBER() OVER (ORDER BY date) AS number,
ROW_NUMBER() OVER (PARTITION BY first, second, third, fourth ORDER BY date) AS part,
*
from roster
)
select MIN(date) as startdate, MAX(date) as enddate, COUNT(*) count, first, second, third, fourth
from data group by first, second, third, fourth, number - part
order by number - part
我有一个 table 结构如下:
create table roster
(
date date not null,
first nvarchar(20) not null,
second nvarchar(20) not null,
third nvarchar(20) not null,
fourth nvarchar(20) not null,
)
go
并且插入了以下数据:
insert into roster values ('2015-06-10 12:45:34', 'e', 'm', 'a', 'r')
insert into roster values ('2015-06-11 12:45:34', 'e', 'v', 'a', 'r')
insert into roster values ('2015-06-12 12:45:34', 'e', 'm', 'a', 'd')
insert into roster values ('2015-06-13 12:45:34', 'e', 'm', 'a', 'd') *
insert into roster values ('2015-06-14 12:45:34', 'e', 'm', 'a', 'r')
insert into roster values ('2015-06-15 12:45:34', 'e', 'm', 'a', 'r') *
insert into roster values ('2015-06-16 12:45:34', 'z', 'm', 't', 'r')
注意:* 标记重复。
如何 select 只有一个唯一的连续组合 "first"、"second"、"third" 和 "fourth"?例如,对于上面插入的数据,期望的输出是:
Date First Second Third Fourth
2015-06-10 12:45:34, e m a r
2015-06-11 12:45:34, e v a r
2015-06-12 12:45:34, e m a d
2015-06-14 12:45:34, e m a r
2015-06-16 12:45:34, z m t r
我正在寻找一种解决方案,当条目不再连续时(或序列被破坏时)保留条目,但删除连续条目的重复项。
我在这里看到过类似的问题,但我已经能够制定一个使用分组依据的解决方案。
如有任何帮助,我们将不胜感激
您可以按第一、第二、第三、第四的值分组 然后 select 第一次遇到这些值的日期是 min(date) 或最后一次出现是 max(date)
最后一次遇到的日期示例:fiddle
SELECT min(date) as startdate ,max(date) as enddate, first, second, third, fourth
from roster
GROUP BY first, second, third, fourth
编辑:编辑了之前的查询以包括开始和结束日期
额外:我在等待您的回复时正在玩的东西:包括值出现在 1 个字段中的日期列表:
SELECT first, second, third, fourth,
STUFF((
SELECT ',' + convert(varchar(25),T.date)
FROM roster T
WHERE A.first = T.first
AND A.second = T.second
AND A.third = T.third
AND A.fourth = T.fourth
ORDER BY T.date
FOR XML PATH('')), 1, 1, '') as dates
from roster A
GROUP BY first, second, third, fourth
编辑:我已经非常接近你想要的但不完全是,但是我不知道如何让它更接近,我想这就是我所做的,剩下的取决于别人 :D : SQLFIDDLE
SELECT b.date as startdate, a.date as enddate, a.first, a.second, a.third, a.fourth FROM
(Select ROW_NUMBER()
OVER (ORDER BY first, second, third, fourth,date ) AS Row,
date,
first,second,third,fourth
from roster) A
JOIN
(Select ROW_NUMBER()
OVER (ORDER BY first, second, third, fourth,date ) AS Row,
date,
first,second,third,fourth
from roster) B
ON A.row = b.row + 1
WHERE a.first = b.first
and a.second = b.second
and a.third = b.third
and a.fourth = b.fourth
UNION
select max(date) as startdate, null as enddate, first, second, third, fourth
FROM roster
group by first, second, third, fourth
having count(*) = 1;
如果您只需要一个 exists()
,为什么还需要一个 group by
?
;with data as
(
select ROW_NUMBER() OVER (ORDER BY date) AS number, * from roster
)
select * from data where
not exists -- Just compare with the previous column, if match say bye
(
select * from data prev where 1 = 1
and prev.first = data.first
and prev.second = data.second
and prev.third = data.third
and prev.fourth = data.fourth
and prev.number + 1 = data.number
)
编辑
;with data as
(
select
ROW_NUMBER() OVER (ORDER BY date) AS number,
ROW_NUMBER() OVER (PARTITION BY first, second, third, fourth ORDER BY date) AS part,
*
from roster
)
select MIN(date) as startdate, MAX(date) as enddate, COUNT(*) count, first, second, third, fourth
from data group by first, second, third, fourth, number - part
order by number - part