SQL 与分组相交
SQL intersect with group by
鉴于这两个 tables/sets 具有不同的项目组,我如何找到 set1
中的哪些组跨越 set2
中的多个组? 如何找到 set1
中无法被 set2
中的单个组覆盖的组?
例如对于下表,A (1,2,5)
是唯一跨越 s1
(1,2,3) 和 s2
(2,3,4,5)。 B
和 C
不是答案,因为它们都包含在一个组中 s2
。
我更愿意使用 SQL
(Sql Server 2008 R2
可用)。
谢谢。
set1 set2
+---------+----------+ +---------+----------+
| group | item | | group | item |
`````````````````````+ `````````````````````+
| A | 1 | | s1 | 1 |
| A | 2 | | s1 | 2 |
| A | 5 | | s1 | 3 |
| B | 4 | | s2 | 2 |
| B | 5 | | s2 | 3 |
| C | 3 | | s2 | 4 |
| C | 5 | | s2 | 5 |
+---------+----------+ +---------+----------+
使用这个 sqlfiddle 试试:http://sqlfiddle.com/#!6/fac8a/3
或者使用下面的脚本生成临时表来尝试答案:
create table #set1 (grp varchar(5),item int)
create table #set2 (grp varchar(5),item int)
insert into #set1 select 'a',1 union select 'a',2 union select 'a',5 union select 'b',4 union select 'b',5 union select 'c',3 union select 'c',5
insert into #set2 select 's1',1 union select 's1',2 union select 's1',3 union select 's2',2 union select 's2',3 union select 's2',4 union select 's2',5
select * from #set1
select * from #set2
--drop table #set1
--drop table #set2
Select 来自 set1
的组,在 set2
中没有组,而 set1
中的所有项目都存在于 set2
中:
select s1.grp from set1 s1
where not exists(
select * from set2 s2 where not exists(
select item from set1 s11
where s11.grp = s1.grp
except
select item from set2 s22
where s22.grp = s2.grp))
group by s1.grp
您可以通过以下查询找到解决方案:
SELECT A.GROUP AS G1, A.ITEM AS T1, B.GROUP, B.ITEM
FROM SET1 A RIGHT JOIN SET2 B ON A.ITEM=B.ITEM
WHERE A.GROUP IS NULL
你能检查一下吗
SELECT DISTINCT a.Group1, a.Item, b.CNT
FROM SET1 a
INNER JOIN
(SELECT GroupA, COUNT(*) CNT
FROM
(
SELECT DISTINCT a.Group1 GroupA, b.Group1 GroupB
FROM SET1 a
INNER JOIN SET2 b ON a.Item = b.Item
) a GROUP BY GroupA
) b ON a.Group1 = b.GroupA
WHERE b.CNT > 1
感谢您的评论。我相信以下编辑后的查询会起作用:
Select distinct grp1, initialRows, max(MatchedRows) from
(
select a.grp as grp1, b.grp as grp2
, count(distinct case when b.item is not null then a.item end) as MatchedRows
, d.InitialRows
from set1 a
left join set2 b
on a.item = b.item
left join
(select grp, count(distinct Item) as InitialRows from set1
group by grp) d
on a.grp = d.grp
group by a.grp, b.grp, InitialRows
) c
group by grp1, InitialRows
having max(MatchedRows) < InitialRows
好的。这很丑陋,但它应该有效。我在 fiddle 试过了。我觉得可以通过windowing来完成,但是我得想一想。
这是丑陋的现在。
WITH d1 AS (
SELECT set1.grp
, COUNT(*) cnt
FROM set1
GROUP BY set1.grp
), d2 AS (
SELECT set1.grp grp1
, set2.grp grp2
, COUNT(set1.item) cnt
FROM set1
INNER JOIN set2
ON set1.item = set2.item
GROUP BY set1.grp
, set2.grp
)
SELECT grp
FROM d1
EXCEPT
SELECT d1.grp
FROM d1
INNER JOIN d2
ON d2.grp1 = d1.grp
AND d2.cnt = d1.cnt
我认为这会成功。子查询 returns 每个 set1 组设置 2 个组,通过计算匹配项并将匹配项计数与 set1 组计数进行比较,匹配 set1 中的所有项目。
select s.grp from #set1 s
group by s.grp
having not exists (
select s2.grp from #set2 s2 inner join #set1 s1 on s2.item = s1.item
where s1.grp = s.grp
group by s2.grp
having count(s.item) = count(s2.item)
)
与罗伯特公司基本相同
我没有从他的回答中得到这个 - 独立提出这个
select set1.group
from set1
except
select set1count.group
from ( select set1.group , count(*) as [count]
from set1
) as set1count
join ( select set1.group as [group1], count(*) as [count]
from set1
join set2
on set2.item = set1.item
group by set1.group, set2.group -- this is the magic
) as set1count
on set1count.group = set2count.[group1] -- note no set2.group match
and set1count.count = set12count.count -- the items in set1 are in at least on set2 group
鉴于这两个 tables/sets 具有不同的项目组,我如何找到 如何找到 set1
中的哪些组跨越 set2
中的多个组? set1
中无法被 set2
中的单个组覆盖的组?
例如对于下表,A (1,2,5)
是唯一跨越 s1
(1,2,3) 和 s2
(2,3,4,5)。 B
和 C
不是答案,因为它们都包含在一个组中 s2
。
我更愿意使用 SQL
(Sql Server 2008 R2
可用)。
谢谢。
set1 set2
+---------+----------+ +---------+----------+
| group | item | | group | item |
`````````````````````+ `````````````````````+
| A | 1 | | s1 | 1 |
| A | 2 | | s1 | 2 |
| A | 5 | | s1 | 3 |
| B | 4 | | s2 | 2 |
| B | 5 | | s2 | 3 |
| C | 3 | | s2 | 4 |
| C | 5 | | s2 | 5 |
+---------+----------+ +---------+----------+
使用这个 sqlfiddle 试试:http://sqlfiddle.com/#!6/fac8a/3
或者使用下面的脚本生成临时表来尝试答案:
create table #set1 (grp varchar(5),item int)
create table #set2 (grp varchar(5),item int)
insert into #set1 select 'a',1 union select 'a',2 union select 'a',5 union select 'b',4 union select 'b',5 union select 'c',3 union select 'c',5
insert into #set2 select 's1',1 union select 's1',2 union select 's1',3 union select 's2',2 union select 's2',3 union select 's2',4 union select 's2',5
select * from #set1
select * from #set2
--drop table #set1
--drop table #set2
Select 来自 set1
的组,在 set2
中没有组,而 set1
中的所有项目都存在于 set2
中:
select s1.grp from set1 s1
where not exists(
select * from set2 s2 where not exists(
select item from set1 s11
where s11.grp = s1.grp
except
select item from set2 s22
where s22.grp = s2.grp))
group by s1.grp
您可以通过以下查询找到解决方案:
SELECT A.GROUP AS G1, A.ITEM AS T1, B.GROUP, B.ITEM
FROM SET1 A RIGHT JOIN SET2 B ON A.ITEM=B.ITEM
WHERE A.GROUP IS NULL
你能检查一下吗
SELECT DISTINCT a.Group1, a.Item, b.CNT
FROM SET1 a
INNER JOIN
(SELECT GroupA, COUNT(*) CNT
FROM
(
SELECT DISTINCT a.Group1 GroupA, b.Group1 GroupB
FROM SET1 a
INNER JOIN SET2 b ON a.Item = b.Item
) a GROUP BY GroupA
) b ON a.Group1 = b.GroupA
WHERE b.CNT > 1
感谢您的评论。我相信以下编辑后的查询会起作用:
Select distinct grp1, initialRows, max(MatchedRows) from
(
select a.grp as grp1, b.grp as grp2
, count(distinct case when b.item is not null then a.item end) as MatchedRows
, d.InitialRows
from set1 a
left join set2 b
on a.item = b.item
left join
(select grp, count(distinct Item) as InitialRows from set1
group by grp) d
on a.grp = d.grp
group by a.grp, b.grp, InitialRows
) c
group by grp1, InitialRows
having max(MatchedRows) < InitialRows
好的。这很丑陋,但它应该有效。我在 fiddle 试过了。我觉得可以通过windowing来完成,但是我得想一想。
这是丑陋的现在。
WITH d1 AS (
SELECT set1.grp
, COUNT(*) cnt
FROM set1
GROUP BY set1.grp
), d2 AS (
SELECT set1.grp grp1
, set2.grp grp2
, COUNT(set1.item) cnt
FROM set1
INNER JOIN set2
ON set1.item = set2.item
GROUP BY set1.grp
, set2.grp
)
SELECT grp
FROM d1
EXCEPT
SELECT d1.grp
FROM d1
INNER JOIN d2
ON d2.grp1 = d1.grp
AND d2.cnt = d1.cnt
我认为这会成功。子查询 returns 每个 set1 组设置 2 个组,通过计算匹配项并将匹配项计数与 set1 组计数进行比较,匹配 set1 中的所有项目。
select s.grp from #set1 s
group by s.grp
having not exists (
select s2.grp from #set2 s2 inner join #set1 s1 on s2.item = s1.item
where s1.grp = s.grp
group by s2.grp
having count(s.item) = count(s2.item)
)
与罗伯特公司基本相同
我没有从他的回答中得到这个 - 独立提出这个
select set1.group
from set1
except
select set1count.group
from ( select set1.group , count(*) as [count]
from set1
) as set1count
join ( select set1.group as [group1], count(*) as [count]
from set1
join set2
on set2.item = set1.item
group by set1.group, set2.group -- this is the magic
) as set1count
on set1count.group = set2count.[group1] -- note no set2.group match
and set1count.count = set12count.count -- the items in set1 are in at least on set2 group