SQL - 在大型 table 中找到确切的记录组
SQL - find exact group of records in large table
我有以下数据:
ID --- GRP_ID --- REC_VAL
1 --- 1 --- A
2 --- 2 --- A
3 --- 2 --- B
4 --- 3 --- A
5 --- 3 --- B
6 --- 3 --- C
7 --- 4 --- A
8 --- 4 --- B
9 --- 4 --- C
10 --- 5 --- A
11 --- 5 --- B
12 --- 5 --- E
有没有办法找到具有相同值的记录组的 ID? (在这种情况下 只有 grp_id 3 和 4 具有相同的值)
第二个问题:
当我有一组值时,是否有有效的方法来找到准确的 grp_id?我的解决方案不是很快,因为 table 与组有超过 600 万。记录:
-- Large table - up to 6m records
create table tmp_grp (id number, grp_id number, rec_val varchar2(10));
--
insert into tmp_grp(id, grp_id, rec_val) values (1, 1, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (2, 2, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (3, 2, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (4, 3, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (5, 3, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (6, 3, 'C');
insert into tmp_grp(id, grp_id, rec_val) values (7, 4, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (8, 4, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (9, 4, 'C');
insert into tmp_grp(id, grp_id, rec_val) values (10, 5, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (11, 5, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (12, 5, 'E');
commit;
--
-- CTE representing record group for asking
WITH datrec AS
(SELECT 'A' rec FROM dual UNION ALL
SELECT 'B' rec FROM dual)
--
SELECT x.grp_id
FROM (
-- Count of joined records
SELECT COUNT(1) cnt, t.grp_id
FROM tmp_grp t
JOIN datrec d
ON d.rec = t.rec_val
GROUP BY t.grp_id
--
) x
WHERE
-- Count of all data records
x.cnt = (SELECT COUNT(1) FROM datrec)
-- Count of all group records
AND x.cnt = (SELECT COUNT(1) FROM tmp_grp g WHERE x.grp_id = g.grp_id);
--
这个问题类似于 ,但是这个主题只涵盖精确的一组值(值的数量和 datrec 的 rec 列中的值将由另一个查询提供)和查询 return包含此集合的组。我只需要 return 完全匹配。
更新
- 在 table 中添加数据以便更好地说明
也与 How to compare groups of tuples in sql
相关
这是一种避免将基数 table 与其自身连接的方法。如果每个 grp_id
有几个(很多?)可能的 rec_val
值,它会更有效。如果不同的 grp_id
已经存在于数据中的某处,它仍然可以变得更快;我即时创建它们。
with gid ( grp_id ) as (
select distinct grp_id from tmp_grp
),
prep ( grp_id_1, grp_id_2, rec_val ) as (
select t.grp_id, g.grp_id, t.rec_val
from tmp_grp t join gid g on t.grp_id < g.grp_id
union all
select g.grp_id, t.grp_id, t.rec_val
from gid g join tmp_grp t on g.grp_id < t.grp_id
),
counts ( grp_id_1, grp_id_2, cnt ) as (
select grp_id_1, grp_id_2, count(*)
from prep
group by grp_id_1, grp_id_2, rec_val
)
select grp_id_1, grp_id_2
from counts
group by grp_id_1, grp_id_2
having min(cnt) = 2
;
我有以下数据:
ID --- GRP_ID --- REC_VAL
1 --- 1 --- A
2 --- 2 --- A
3 --- 2 --- B
4 --- 3 --- A
5 --- 3 --- B
6 --- 3 --- C
7 --- 4 --- A
8 --- 4 --- B
9 --- 4 --- C
10 --- 5 --- A
11 --- 5 --- B
12 --- 5 --- E
有没有办法找到具有相同值的记录组的 ID? (在这种情况下 只有 grp_id 3 和 4 具有相同的值)
第二个问题:
当我有一组值时,是否有有效的方法来找到准确的 grp_id?我的解决方案不是很快,因为 table 与组有超过 600 万。记录:
-- Large table - up to 6m records
create table tmp_grp (id number, grp_id number, rec_val varchar2(10));
--
insert into tmp_grp(id, grp_id, rec_val) values (1, 1, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (2, 2, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (3, 2, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (4, 3, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (5, 3, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (6, 3, 'C');
insert into tmp_grp(id, grp_id, rec_val) values (7, 4, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (8, 4, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (9, 4, 'C');
insert into tmp_grp(id, grp_id, rec_val) values (10, 5, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (11, 5, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (12, 5, 'E');
commit;
--
-- CTE representing record group for asking
WITH datrec AS
(SELECT 'A' rec FROM dual UNION ALL
SELECT 'B' rec FROM dual)
--
SELECT x.grp_id
FROM (
-- Count of joined records
SELECT COUNT(1) cnt, t.grp_id
FROM tmp_grp t
JOIN datrec d
ON d.rec = t.rec_val
GROUP BY t.grp_id
--
) x
WHERE
-- Count of all data records
x.cnt = (SELECT COUNT(1) FROM datrec)
-- Count of all group records
AND x.cnt = (SELECT COUNT(1) FROM tmp_grp g WHERE x.grp_id = g.grp_id);
--
这个问题类似于
更新 - 在 table 中添加数据以便更好地说明
也与 How to compare groups of tuples in sql
相关这是一种避免将基数 table 与其自身连接的方法。如果每个 grp_id
有几个(很多?)可能的 rec_val
值,它会更有效。如果不同的 grp_id
已经存在于数据中的某处,它仍然可以变得更快;我即时创建它们。
with gid ( grp_id ) as (
select distinct grp_id from tmp_grp
),
prep ( grp_id_1, grp_id_2, rec_val ) as (
select t.grp_id, g.grp_id, t.rec_val
from tmp_grp t join gid g on t.grp_id < g.grp_id
union all
select g.grp_id, t.grp_id, t.rec_val
from gid g join tmp_grp t on g.grp_id < t.grp_id
),
counts ( grp_id_1, grp_id_2, cnt ) as (
select grp_id_1, grp_id_2, count(*)
from prep
group by grp_id_1, grp_id_2, rec_val
)
select grp_id_1, grp_id_2
from counts
group by grp_id_1, grp_id_2
having min(cnt) = 2
;