报告重复数据

Report duplicate data

create table dupt(cat varchar(10), num int)

insert dupt(cat,num) values ('A',1),('A',2),('A',3),
                            ('B',1),('B',2),
                            ('C',1),('C',2), ('C',3),
                            ('D',1),('D',2), ('D',4),
                            ('E',1),('E',2),
                            ('F',1),('F',2)

我需要创建一个报告来找出重复数据。从上面的示例数据中,报告需要显示猫 A 的数据被猫 C 复制(注意 num 值和记录数),猫 B 被猫 E 和 F 复制。显示的最佳方式是什么?

示例输出

-------------
|cat | dupby|
-------------
| A  |  C   |
| B  | E, F |
-------------

更新:仅在最终结果上使用 common table expression and the stuff() with select ... for xml path ('') method of string concatenation 切换到传统集合匹配:

;with cte as (
  select *
    , cnt = count(*) over (partition by cat)
  from t
)
, duplicates as (
  select 
        x.cat
      , dup_cat = x2.cat
  from cte as x 
    inner join cte as x2
       on x.cat < x2.cat
      and x.num = x2.num
      and x.cnt = x2.cnt 
  group by x.cat, x2.cat, x.cnt
  having count(*) = x.cnt
)
select 
    d.cat
  , dupby = stuff((
        select ', '+i.dup_cat
        from duplicates i
        where i.cat = d.cat
        for xml path (''), type).value('.','varchar(8000)')
     ,1,2,'')
from duplicates d
where not exists (
    select 1
    from duplicates i
    where d.cat = i.dup_cat
    )
group by d.cat

rextester 演示:http://rextester.com/KHAG98718

returns:

+-----+-------+
| cat | dupby |
+-----+-------+
| A   | C     |
| B   | E, F  |
+-----+-------+