自连接查询以查找重复记录
Self Join query to find duplicate records
我有一个大约有 32 列的 table,我想检查重复的行,但是有两列是唯一的,因此每一行都不同。所以我需要 JOIN
条件基本上是自我加入重复记录,其中除了两列之外所有列都相等。
要查找重复的行,查询将是(col1...col30 将排除每行始终唯一的两列)
SELECT
col1,
col2,
...,
col30
FROM
your_table
GROUP BY
col1,
col2,
...,
col30
HAVING
COUNT(*)>1
编辑:从您要删除重复行的备注中可以看出。假设您有每行唯一的列 id1 和 id2(即主键)和可以重复的列 col1..col4。以下面的脚本为例,删除重复行:
CREATE TABLE #tt(
id1 INT,
id2 INT,
col1 INT,
col2 INT,
col3 INT,
col4 INT,
PRIMARY KEY(id1,id2)
);
INSERT INTO #tt(id1,id2,col1,col2,col3,col4)
VALUES (1,1,1,1,1,1),
(1,2,1,1,1,1),
(1,3,1,1,1,1),
(2,1,1,1,1,1),
(3,1,2,2,2,2),
(4,1,2,2,2,2),
(2,2,2,2,2,2),
(3,3,3,3,3,3),
(4,2,3,3,3,3);
DELETE
t
FROM
#tt AS t
INNER JOIN (
SELECT
id1,
id2,
ROW_NUMBER() OVER (PARTITION BY col1,col2,col3,col4 ORDER BY id1,id2) AS rn
FROM
#tt
) AS trn ON
trn.id1=t.id1 AND
trn.id2=t.id2
WHERE
trn.rn>1;
SELECT*FROM #tt;
DROP TABLE #tt;
你也可以这样使用:
SELECT *
FROM yourTable t
WHERE 1 < (
SELECT COUNT(*)
FROM yourTable ti
WHERE t.uniqueCol1 = ti.uniqueCol1 AND t.uniqueCol2 = ti.uniqueCol2
GROUP BY nonUniqueCol1, nonUniqueCol2, ..., nonUniqueColn)
或者只过滤重复行而不显示原始行,您可以使用:
SELECT *
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY nonUniqueCol1, nonUniqueCol2, ..., nonUniqueColn
ORDER BY uniqueCol1, uniqueCol2) As seq
FROM yourTable) t
WHERE (seq > 1)
我有一个大约有 32 列的 table,我想检查重复的行,但是有两列是唯一的,因此每一行都不同。所以我需要 JOIN
条件基本上是自我加入重复记录,其中除了两列之外所有列都相等。
要查找重复的行,查询将是(col1...col30 将排除每行始终唯一的两列)
SELECT
col1,
col2,
...,
col30
FROM
your_table
GROUP BY
col1,
col2,
...,
col30
HAVING
COUNT(*)>1
编辑:从您要删除重复行的备注中可以看出。假设您有每行唯一的列 id1 和 id2(即主键)和可以重复的列 col1..col4。以下面的脚本为例,删除重复行:
CREATE TABLE #tt(
id1 INT,
id2 INT,
col1 INT,
col2 INT,
col3 INT,
col4 INT,
PRIMARY KEY(id1,id2)
);
INSERT INTO #tt(id1,id2,col1,col2,col3,col4)
VALUES (1,1,1,1,1,1),
(1,2,1,1,1,1),
(1,3,1,1,1,1),
(2,1,1,1,1,1),
(3,1,2,2,2,2),
(4,1,2,2,2,2),
(2,2,2,2,2,2),
(3,3,3,3,3,3),
(4,2,3,3,3,3);
DELETE
t
FROM
#tt AS t
INNER JOIN (
SELECT
id1,
id2,
ROW_NUMBER() OVER (PARTITION BY col1,col2,col3,col4 ORDER BY id1,id2) AS rn
FROM
#tt
) AS trn ON
trn.id1=t.id1 AND
trn.id2=t.id2
WHERE
trn.rn>1;
SELECT*FROM #tt;
DROP TABLE #tt;
你也可以这样使用:
SELECT *
FROM yourTable t
WHERE 1 < (
SELECT COUNT(*)
FROM yourTable ti
WHERE t.uniqueCol1 = ti.uniqueCol1 AND t.uniqueCol2 = ti.uniqueCol2
GROUP BY nonUniqueCol1, nonUniqueCol2, ..., nonUniqueColn)
或者只过滤重复行而不显示原始行,您可以使用:
SELECT *
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY nonUniqueCol1, nonUniqueCol2, ..., nonUniqueColn
ORDER BY uniqueCol1, uniqueCol2) As seq
FROM yourTable) t
WHERE (seq > 1)