SQL 从两个具有交集和并集但也具有其他独特属性的表中查询
SQL Query from two tables with intersection and union but also with other unique properties
我有两个table
Table 1
Brand | Price| Shape |weight |Color |URL
--------------------------------
Philips| 13 | Square| 12lbs |Blue |example.com/123
Philips| 4 | Round | 17 lbs |Yellow|example.com/1567
Table 2
Brand | Price| Shape |weight |Color |URL
--------------------------------
Philips| 12 | Square | 12lbs |Blue |example.com/456
Philips| 4 | Round | 16 lbs|Yellow|example.com/17987
GE | 4 | Square | 17 lbs|red |example.com/17234234
我想编写 SQL 查询,通过比较最便宜的价格、所有属性和 URL,我可以 select 来自这两个 table 的产品.我尝试加入
select
case when a.price < b.price then A.price else B.price end as price,
*
from
Table1 A, table2 B
where
A.Brand = B.Brand
and A.Shape = B.Shape
and A.weight = B.weight
and A.color = B.color
但是这个returns重复的结果。
我试过并集和交集,但没有给我 URL
SELECT
Brand , Shape, weight, color, URL
FROM
table1
WHERE
Price !='NULL'
AND BulbShape != 'null'
AND Wattage != 'null'
AND Lumens_Initial != 'null'
UNION
SELECT
Brand, Shape, weight, color, URL
FROM
table2
WHERE
Price != 'NULL'
AND Shape != 'null'
AND weight != 'null'
AND color != 'null'
EXCEPT
SELECT
Brand, Shape, weight, color, URL
FROM
table1
WHERE
Price != 'NULL'
AND Shape != 'null'
AND weight != 'null'
AND color != 'null'
INTERSECT
SELECT
Brand, Shape, weight, color, URL
FROM
table2
WHERE
Price != 'NULL'
AND Shape != 'null'
AND Wattage != 'null'
AND color != 'null'
我没有任何主键,因为它只是从网络上收集的数据。
我如何编写查询来获取唯一的数据,包括来自 table 的所有列和最低价格?
预期结果应该类似于
Brand | Price| Shape |weight |Color |URL
--------------------------------------------------------------
Philips| 12 | Square | 12 lbs |Blue |example.com/123
Philips| 4 | Round | 17 lbs |Yellow |example.com/1567
Philips| 4 | Round | 16 lbs |Yellow |example.com/17987
GE | 4 | Square | 17 lbs |red |example.com/17234234
在第一行中,我刚得到最低价格,其余价格与第一行相同 table。第二行有不同的属性,所以我从 table 中得到了行。最后一行仅在第二行 table 中,所以我得到了那一行。
CREATE Procedure joindemo
as
CREATE TABLE #table1
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
)
CREATE TABLE #table2
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
)
INSERT INTO #table1 VALUES('Philips', 13, 'Square', '12lbs', 'Blue', 'example.com/123')
INSERT INTO #table1 VALUES('Philips', 4, 'Round', '17lbs', 'Yellow', 'example.com/1567')
INSERT INTO #table2 VALUES('Philips', 12, 'Square', '12lbs', 'Blue', 'example.com/456')
INSERT INTO #table2 VALUES('Philips', 4, 'Round', '16lbs', 'Yellow', 'example.com/17987')
INSERT INTO #table2 VALUES('GE', 4, 'Square', '17lbs', 'Red', 'example.com/17234234')
CREATE TABLE #jointable
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
)
INSERT INTO #jointable
SELECT * FROM #table1
UNION
SELECT * FROM #table2
SELECT
j.brand, mp.minprice, j.shape, j.weight, j.color, j.url FROM
(SELECT brand, Min(price) as minprice, shape, weight, color FROM
#jointable
GROUP BY brand, shape, weight, color) as mp
INNER JOIN #jointable j ON mp.brand = j.brand AND mp.minprice = j.price
AND mp.color = j.color AND mp.shape = j.shape and mp.weight = j.weight
DROP TABLE #table1
DROP TABLE #table2
DROP TABLE #jointable
--exec joindemo;
请注意,您预期的输出是错误的。第一行的url应该是example.com/456。如果两个价格相同,您还需要决定要做什么!因为你没有说明,我没办法再猜你是想展示两个还是一个!
经典top-n-per-group
。
示例数据
DECLARE @table1 TABLE
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
);
DECLARE @table2 TABLE
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
);
INSERT INTO @table1 (brand,price,shape,weight,color,url) VALUES
('Philips', 13, 'Square', '12lbs', 'Blue', 'example.com/123'),
('Philips', 4, 'Round', '17lbs', 'Yellow', 'example.com/1567');
INSERT INTO @table2 (brand,price,shape,weight,color,url) VALUES
('Philips', 12, 'Square', '12lbs', 'Blue', 'example.com/456'),
('Philips', 4, 'Round', '16lbs', 'Yellow', 'example.com/17987'),
('GE', 4, 'Square', '17lbs', 'Red', 'example.com/17234234');
查询
起初UNION ALL
两个表变成CTE_Tables
。然后使用 ROW_NUMBER
为按所有属性分区并按价格排序的每一行生成数字 (CTE_RN
)。最后 select 每组只有第一行。
WITH
CTE_Tables
AS
(
SELECT brand,price,shape,weight,color,url
FROM @table1
UNION ALL
SELECT brand,price,shape,weight,color,url
FROM @table2
)
,CTE_RN
AS
(
SELECT brand,price,shape,weight,color,url
,ROW_NUMBER() OVER(
PARTITION BY brand,shape,weight,color
ORDER BY price) AS rn
FROM CTE_Tables
)
SELECT brand,price,shape,weight,color,url
FROM CTE_RN
WHERE rn = 1
ORDER BY brand DESC,price DESC,shape DESC,weight DESC,color,url;
结果
+---------+-------+--------+--------+--------+----------------------+
| brand | price | shape | weight | color | url |
+---------+-------+--------+--------+--------+----------------------+
| Philips | 12 | Square | 12lbs | Blue | example.com/456 |
| Philips | 4 | Round | 17lbs | Yellow | example.com/1567 |
| Philips | 4 | Round | 16lbs | Yellow | example.com/17987 |
| GE | 4 | Square | 17lbs | Red | example.com/17234234 |
+---------+-------+--------+--------+--------+----------------------+
我有两个table
Table 1
Brand | Price| Shape |weight |Color |URL
--------------------------------
Philips| 13 | Square| 12lbs |Blue |example.com/123
Philips| 4 | Round | 17 lbs |Yellow|example.com/1567
Table 2
Brand | Price| Shape |weight |Color |URL
--------------------------------
Philips| 12 | Square | 12lbs |Blue |example.com/456
Philips| 4 | Round | 16 lbs|Yellow|example.com/17987
GE | 4 | Square | 17 lbs|red |example.com/17234234
我想编写 SQL 查询,通过比较最便宜的价格、所有属性和 URL,我可以 select 来自这两个 table 的产品.我尝试加入
select
case when a.price < b.price then A.price else B.price end as price,
*
from
Table1 A, table2 B
where
A.Brand = B.Brand
and A.Shape = B.Shape
and A.weight = B.weight
and A.color = B.color
但是这个returns重复的结果。
我试过并集和交集,但没有给我 URL
SELECT
Brand , Shape, weight, color, URL
FROM
table1
WHERE
Price !='NULL'
AND BulbShape != 'null'
AND Wattage != 'null'
AND Lumens_Initial != 'null'
UNION
SELECT
Brand, Shape, weight, color, URL
FROM
table2
WHERE
Price != 'NULL'
AND Shape != 'null'
AND weight != 'null'
AND color != 'null'
EXCEPT
SELECT
Brand, Shape, weight, color, URL
FROM
table1
WHERE
Price != 'NULL'
AND Shape != 'null'
AND weight != 'null'
AND color != 'null'
INTERSECT
SELECT
Brand, Shape, weight, color, URL
FROM
table2
WHERE
Price != 'NULL'
AND Shape != 'null'
AND Wattage != 'null'
AND color != 'null'
我没有任何主键,因为它只是从网络上收集的数据。
我如何编写查询来获取唯一的数据,包括来自 table 的所有列和最低价格?
预期结果应该类似于
Brand | Price| Shape |weight |Color |URL
--------------------------------------------------------------
Philips| 12 | Square | 12 lbs |Blue |example.com/123
Philips| 4 | Round | 17 lbs |Yellow |example.com/1567
Philips| 4 | Round | 16 lbs |Yellow |example.com/17987
GE | 4 | Square | 17 lbs |red |example.com/17234234
在第一行中,我刚得到最低价格,其余价格与第一行相同 table。第二行有不同的属性,所以我从 table 中得到了行。最后一行仅在第二行 table 中,所以我得到了那一行。
CREATE Procedure joindemo
as
CREATE TABLE #table1
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
)
CREATE TABLE #table2
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
)
INSERT INTO #table1 VALUES('Philips', 13, 'Square', '12lbs', 'Blue', 'example.com/123')
INSERT INTO #table1 VALUES('Philips', 4, 'Round', '17lbs', 'Yellow', 'example.com/1567')
INSERT INTO #table2 VALUES('Philips', 12, 'Square', '12lbs', 'Blue', 'example.com/456')
INSERT INTO #table2 VALUES('Philips', 4, 'Round', '16lbs', 'Yellow', 'example.com/17987')
INSERT INTO #table2 VALUES('GE', 4, 'Square', '17lbs', 'Red', 'example.com/17234234')
CREATE TABLE #jointable
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
)
INSERT INTO #jointable
SELECT * FROM #table1
UNION
SELECT * FROM #table2
SELECT
j.brand, mp.minprice, j.shape, j.weight, j.color, j.url FROM
(SELECT brand, Min(price) as minprice, shape, weight, color FROM
#jointable
GROUP BY brand, shape, weight, color) as mp
INNER JOIN #jointable j ON mp.brand = j.brand AND mp.minprice = j.price
AND mp.color = j.color AND mp.shape = j.shape and mp.weight = j.weight
DROP TABLE #table1
DROP TABLE #table2
DROP TABLE #jointable
--exec joindemo;
请注意,您预期的输出是错误的。第一行的url应该是example.com/456。如果两个价格相同,您还需要决定要做什么!因为你没有说明,我没办法再猜你是想展示两个还是一个!
经典top-n-per-group
。
示例数据
DECLARE @table1 TABLE
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
);
DECLARE @table2 TABLE
(
brand varchar(50),
price int,
shape varchar(50),
weight varchar(50),
color varchar(50),
url varchar(100)
);
INSERT INTO @table1 (brand,price,shape,weight,color,url) VALUES
('Philips', 13, 'Square', '12lbs', 'Blue', 'example.com/123'),
('Philips', 4, 'Round', '17lbs', 'Yellow', 'example.com/1567');
INSERT INTO @table2 (brand,price,shape,weight,color,url) VALUES
('Philips', 12, 'Square', '12lbs', 'Blue', 'example.com/456'),
('Philips', 4, 'Round', '16lbs', 'Yellow', 'example.com/17987'),
('GE', 4, 'Square', '17lbs', 'Red', 'example.com/17234234');
查询
起初UNION ALL
两个表变成CTE_Tables
。然后使用 ROW_NUMBER
为按所有属性分区并按价格排序的每一行生成数字 (CTE_RN
)。最后 select 每组只有第一行。
WITH
CTE_Tables
AS
(
SELECT brand,price,shape,weight,color,url
FROM @table1
UNION ALL
SELECT brand,price,shape,weight,color,url
FROM @table2
)
,CTE_RN
AS
(
SELECT brand,price,shape,weight,color,url
,ROW_NUMBER() OVER(
PARTITION BY brand,shape,weight,color
ORDER BY price) AS rn
FROM CTE_Tables
)
SELECT brand,price,shape,weight,color,url
FROM CTE_RN
WHERE rn = 1
ORDER BY brand DESC,price DESC,shape DESC,weight DESC,color,url;
结果
+---------+-------+--------+--------+--------+----------------------+
| brand | price | shape | weight | color | url |
+---------+-------+--------+--------+--------+----------------------+
| Philips | 12 | Square | 12lbs | Blue | example.com/456 |
| Philips | 4 | Round | 17lbs | Yellow | example.com/1567 |
| Philips | 4 | Round | 16lbs | Yellow | example.com/17987 |
| GE | 4 | Square | 17lbs | Red | example.com/17234234 |
+---------+-------+--------+--------+--------+----------------------+