寻找最短路径最多分离十度
finding shortest path up to ten degrees of separation
我在SQL中有以下三个表:
select * from movie limit 2;
id | title | year | content_rating | duration | lang | country | gross | budget | director_id
------+----------------------------+------+----------------+----------+------------+----------------------+----------+----------+-------------
407 | 102 Dalmatians | 2000 | G | 100 | English | USA | 66941559 | 85000000 | 2174
3699 | 10 Cloverfield Lane | 2016 | PG-13 | 104 | English | USA | 71897215 | 15000000 | 1327
(2 rows)
select * from actor limit 3;
id | name | facebook_likes
------+----------------------+----------------
408 | Christian Bale | 23000
1430 | Donna Murphy | 553
66 | Robert Downey Jr. | 21000
(3 rows)
select * from acting limit 3;
movie_id | actor_id
----------+----------
407 | 2024
3699 | 1841
3016 | 11
(3 rows)
给定两个演员 a1
和 a2
,我想找到 最短 路径 a1
和 a2
。
例如,假设 a1 = 'Tom Cruise'
和 a2 = 'Robert Downey Jr'
。
输出应该是
Tom Cruise was in Days of Thunder with Robert Duvall
-> Robert Duvall was in Lucky You with Robert Downey Jr.
在这种情况下,Tom Cruise
与 Robert Downey Jr
相距 2 度,Robert Durvall
连接它们。我最多想输出 10 度 ,然后忽略任何连接。
我尝试使用递归 CTE 实施解决方案 ,但我认为我没有正确应用它。感谢帮助,提前致谢:)
尝试查询:
with recursive cte as (
select actor.name, movie.title, 1 as level from movie
left join acting on acting.movie_id = movie.id
left join actor on actor.id = acting.actor_id
where actor.name = 'Tom Cruise'
union
select actor.name, movie.title, level+1 from movie
left join acting on acting.movie_id = movie.id
left join actor on actor.id = acting.actor_id
inner join cte on cte.name = actor.name
where cte.name = actor.name and cte.level < 10
)
select * from cte
我不确定查询中的第二个 select 会是什么 return 但这里有一种获取参与者之间分离度的方法:
假设我们有 table 个演员 ID,Origin。为了获得与我们 table 中的其中一位演员在同一部电影中扮演过的所有演员,我们需要从 Origin 开始,加入 Acting 然后加入 Movie 以获得我们起源的所有电影演员已经上场,然后再次加入 Acting 和 Actor table 以获得我们想要的。注意 Acting table 出现了两次。如果我们将其应用于递归 CTE 和您的问题,并注意到 Origin table 在您的示例中将是 Cte,我们将得到以下结果:
WITH RECURSIVE cte(id, distance) AS (
SELECT actor.id, 0
FROM actor
WHERE actor.name = 'Tom Cruise'
UNION
SELECT DISTINCT actor.id, cte.distance + 1
FROM cte
JOIN acting AS acting1 ON (cte.id = acting1.actor_id)
JOIN movie ON (acting1.movie_id = movie.id)
JOIN acting AS acting2 ON (movie.id = acting2.movie_id)
JOIN actor ON (acting2.actor_id = actor.id)
WHERE cte.id <> actor.id AND cte.distance + 1 <= 10
)
在此之后,cte table 将包含类型为 (id, dist) 的元组,这意味着存在从汤姆克鲁斯到具有此 id 且距离为 dist 的演员的路径。
DISTINCT 是为了提高效率。我们的 Cte table 中会有很多错误对(第二个值大于真实距离),尤其是在 actor 图密集的情况下,但正确的元组 将 在 Cte table。正确的元组是指一个元组(演员,距离),这样的距离是起始演员(例如汤姆克鲁斯)和这个演员之间的最短路径。
编辑: 不好意思,UNION 已经这样做了,所以重复不需要 DISTINCT。
为了得到那个距离,我们添加一个 select 和一个 group by 子句:
WITH RECURSIVE cte(id, distance) AS (
SELECT actor.id, 0
FROM actor
WHERE actor.name = 'Tom Cruise'
UNION
SELECT actor.id, cte.distance + 1
FROM cte
JOIN acting AS acting1 ON (cte.id = acting1.actor_id)
JOIN movie ON (acting1.movie_id = movie.id)
JOIN acting AS acting2 ON (movie.id = acting2.movie_id)
JOIN actor ON (acting2.actor_id = actor.id)
WHERE cte.id <> actor.id AND cte.distance + 1 <= 10
)
SELECT id, MIN(distance) AS distance
FROM cte
GROUP BY id
ORDER BY 2 ASC;
如果您想查看给定的第二个演员的结果,比如小罗伯特唐尼,那么这将为您提供有关分离度的答案:
WITH RECURSIVE cte(id, distance) AS (
SELECT actor.id, 0
FROM actor
WHERE actor.name = 'Tom Cruise'
UNION
SELECT actor.id, cte.distance + 1
FROM cte
JOIN acting AS acting1 ON (cte.id = acting1.actor_id)
JOIN movie ON (acting1.movie_id = movie.id)
JOIN acting AS acting2 ON (movie.id = acting2.movie_id)
JOIN actor ON (acting2.actor_id = actor.id)
WHERE cte.id <> actor.id AND cte.distance + 1 <= 10
), distance_table (id, distance) AS (
SELECT id, MIN(distance) AS distance
FROM cte
GROUP BY id
)
SELECT 'Tom Cruise and ' || actor.name || ' are separated by ' ||
COALESCE(TO_CHAR(distance_table.distance, '999999'), 'more than 10') || ' degrees of separation'
FROM actor
LEFT JOIN distance_table ON (actor.id = distance_table.id)
WHERE actor.name = 'Robert Downey Jr';
尽管我认为您一般不会想直接从数据库中计算此类信息,但如果您想要一条消息告诉演员之间的路径,就像您提供的那样(汤姆克鲁斯是在 Days of Thunder with Robert Duvall -> Robert Duvall 在 Lucky You 和 Robert Downey Jr.) 中,那么这样的事情可以 return 那:
WITH RECURSIVE cte(id, name, distance, message) AS (
SELECT actor.id, actor.name, 0, ''
FROM actor
WHERE actor.name = 'Tom Cruise'
UNION
SELECT actor.id, actor.name, cte.distance + 1,
cte.message || '> ' || cte.name || ' was in ' ||
movie.title || ' with ' || actor.name || ' '
FROM cte
JOIN acting AS acting1 ON (cte.id = acting1.actor_id)
JOIN movie ON (acting1.movie_id = movie.id)
JOIN acting AS acting2 ON (movie.id = acting2.movie_id)
JOIN actor ON (acting2.actor_id = actor.id)
WHERE cte.id <> actor.id AND cte.distance + 1 <= 10
), distance_table (id, distance) AS (
SELECT id, MIN(distance) AS distance
FROM cte
GROUP BY id
)
SELECT id, name, message, distance
FROM cte
WHERE (id, distance) IN (SELECT * FROM distance_table)
ORDER BY distance;
这是一个尝试(没有 CTE)。我碰巧有一份包含 4175 个美国城邦对的列表。 (想想 state==movie 和 city==actor。)
这是来自 table us
的设置:
SET NAMES utf8 COLLATE utf8_unicode_ci;
DROP TABLE IF EXISTS p_mapping; -- state-city pairs (movie-actor)
CREATE TABLE p_mapping (
state char(2) CHARACTER SET ascii NOT NULL,
city varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL,
PRIMARY KEY(state, city),
INDEX(city, state)
) ENGINE=InnoDB;
INSERT INTO p_mapping (state, city)
SELECT state, city FROM us;
DROP TABLE IF EXISTS p_cities; -- city ~= actor
CREATE TABLE p_cities (
depth TINYINT UNSIGNED NOT NULL DEFAULT 0,
from_state char(2) CHARACTER SET ascii NOT NULL DEFAULT '',
city VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL,
PRIMARY KEY(city)
) ENGINE=InnoDB COMMENT 'SO 55717636';
INSERT INTO p_cities (city)
SELECT DISTINCT city FROM p_mapping;
DROP TABLE IF EXISTS p_states; -- state ~= movie
CREATE TABLE p_states (
depth TINYINT UNSIGNED NOT NULL DEFAULT 0,
from_city VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
state char(2) CHARACTER SET ascii NOT NULL,
PRIMARY KEY(state)
) ENGINE=InnoDB COMMENT 'SO 55717636';
INSERT INTO p_states (state)
SELECT DISTINCT state FROM p_mapping;
-- 我选择了连接奥马哈(仅内布拉斯加州)和伯明翰(多个州)的目标。首先进行一些初始化:
SET @city := 'Omaha'; -- starting here
UPDATE p_cities
SET depth = 1
WHERE city = @city;
UPDATE p_states AS s
JOIN p_mapping AS m USING(state)
SET s.from_city = @city,
s.depth = 1
WHERE m.city = @city;
SET @depth := 1;
-- 然后重复以下10次或者直到rows_affected降为0。迭代3次后停止。
UPDATE p_cities AS c
JOIN p_mapping AS m USING(city)
JOIN p_states AS s USING(state)
SET c.from_state = m.state,
c.depth = s.depth + 1
WHERE s.depth = @depth
AND c.depth = 0;
SET @depth := @depth + 1;
UPDATE p_states AS s
JOIN p_mapping AS m USING(state)
JOIN p_cities AS c USING(city)
SET s.from_city = m.city,
s.depth = c.depth
WHERE c.depth = @depth
AND s.depth = 0;
-- 结束循环(算法结束)
-- 正确路径:奥马哈 -> 东北 -> 哥伦布 -> 俄亥俄州 -> 雅典 -> 阿拉巴马州 -> 伯明翰
-- 注意这是如何列出答案的(但是垂直的):
SELECT * FROM p_cities
WHERE city in ('Omaha', 'Columbus', 'Athens', 'Birmingham')
ORDER BY depth;
+-------+------------+------------+
| depth | from_state | city |
+-------+------------+------------+
| 1 | | Omaha |
| 2 | NE | Columbus |
| 3 | OH | Athens |
| 4 | AL | Birmingham |
+-------+------------+------------+
4 rows in set (0.00 sec)
-- 'Proof' 链接适用于以下答案:
SELECT * FROM p_mapping
WHERE city IN ('Omaha', 'Columbus', 'Athens', 'Birmingham')
AND state IN ('NE', 'OH', 'TN', 'AL');
+-------+------------+
| state | city |
+-------+------------+
| AL | Athens |
| OH | Athens |
| TN | Athens |
| AL | Birmingham |
| NE | Columbus |
| OH | Columbus |
| NE | Omaha |
+-------+------------+
7 rows in set (0.00 sec)
--(其他table)
SELECT * FROM p_states
WHERE from_city IN ('Omaha', 'Columbus', 'Athens', 'Birmingham')
OR state IN ('NE', 'OH', 'TN', 'AL')
ORDER BY depth;
+-------+-----------+-------+
| depth | from_city | state |
+-------+-----------+-------+
| 1 | Omaha | NE |
| 2 | Columbus | GA |
| 2 | Columbus | IN |
| 2 | Columbus | MS |
| 2 | Columbus | OH |
| 3 | Athens | AL |
| 3 | Athens | TN |
+-------+-----------+-------+
7 rows in set (0.00 sec)
我在SQL中有以下三个表:
select * from movie limit 2;
id | title | year | content_rating | duration | lang | country | gross | budget | director_id
------+----------------------------+------+----------------+----------+------------+----------------------+----------+----------+-------------
407 | 102 Dalmatians | 2000 | G | 100 | English | USA | 66941559 | 85000000 | 2174
3699 | 10 Cloverfield Lane | 2016 | PG-13 | 104 | English | USA | 71897215 | 15000000 | 1327
(2 rows)
select * from actor limit 3;
id | name | facebook_likes
------+----------------------+----------------
408 | Christian Bale | 23000
1430 | Donna Murphy | 553
66 | Robert Downey Jr. | 21000
(3 rows)
select * from acting limit 3;
movie_id | actor_id
----------+----------
407 | 2024
3699 | 1841
3016 | 11
(3 rows)
给定两个演员 a1
和 a2
,我想找到 最短 路径 a1
和 a2
。
例如,假设 a1 = 'Tom Cruise'
和 a2 = 'Robert Downey Jr'
。
输出应该是
Tom Cruise was in Days of Thunder with Robert Duvall
-> Robert Duvall was in Lucky You with Robert Downey Jr.
在这种情况下,Tom Cruise
与 Robert Downey Jr
相距 2 度,Robert Durvall
连接它们。我最多想输出 10 度 ,然后忽略任何连接。
我尝试使用递归 CTE 实施解决方案
尝试查询:
with recursive cte as (
select actor.name, movie.title, 1 as level from movie
left join acting on acting.movie_id = movie.id
left join actor on actor.id = acting.actor_id
where actor.name = 'Tom Cruise'
union
select actor.name, movie.title, level+1 from movie
left join acting on acting.movie_id = movie.id
left join actor on actor.id = acting.actor_id
inner join cte on cte.name = actor.name
where cte.name = actor.name and cte.level < 10
)
select * from cte
我不确定查询中的第二个 select 会是什么 return 但这里有一种获取参与者之间分离度的方法:
假设我们有 table 个演员 ID,Origin。为了获得与我们 table 中的其中一位演员在同一部电影中扮演过的所有演员,我们需要从 Origin 开始,加入 Acting 然后加入 Movie 以获得我们起源的所有电影演员已经上场,然后再次加入 Acting 和 Actor table 以获得我们想要的。注意 Acting table 出现了两次。如果我们将其应用于递归 CTE 和您的问题,并注意到 Origin table 在您的示例中将是 Cte,我们将得到以下结果:
WITH RECURSIVE cte(id, distance) AS (
SELECT actor.id, 0
FROM actor
WHERE actor.name = 'Tom Cruise'
UNION
SELECT DISTINCT actor.id, cte.distance + 1
FROM cte
JOIN acting AS acting1 ON (cte.id = acting1.actor_id)
JOIN movie ON (acting1.movie_id = movie.id)
JOIN acting AS acting2 ON (movie.id = acting2.movie_id)
JOIN actor ON (acting2.actor_id = actor.id)
WHERE cte.id <> actor.id AND cte.distance + 1 <= 10
)
在此之后,cte table 将包含类型为 (id, dist) 的元组,这意味着存在从汤姆克鲁斯到具有此 id 且距离为 dist 的演员的路径。
DISTINCT 是为了提高效率。我们的 Cte table 中会有很多错误对(第二个值大于真实距离),尤其是在 actor 图密集的情况下,但正确的元组 将 在 Cte table。正确的元组是指一个元组(演员,距离),这样的距离是起始演员(例如汤姆克鲁斯)和这个演员之间的最短路径。
编辑: 不好意思,UNION 已经这样做了,所以重复不需要 DISTINCT。
为了得到那个距离,我们添加一个 select 和一个 group by 子句:
WITH RECURSIVE cte(id, distance) AS (
SELECT actor.id, 0
FROM actor
WHERE actor.name = 'Tom Cruise'
UNION
SELECT actor.id, cte.distance + 1
FROM cte
JOIN acting AS acting1 ON (cte.id = acting1.actor_id)
JOIN movie ON (acting1.movie_id = movie.id)
JOIN acting AS acting2 ON (movie.id = acting2.movie_id)
JOIN actor ON (acting2.actor_id = actor.id)
WHERE cte.id <> actor.id AND cte.distance + 1 <= 10
)
SELECT id, MIN(distance) AS distance
FROM cte
GROUP BY id
ORDER BY 2 ASC;
如果您想查看给定的第二个演员的结果,比如小罗伯特唐尼,那么这将为您提供有关分离度的答案:
WITH RECURSIVE cte(id, distance) AS (
SELECT actor.id, 0
FROM actor
WHERE actor.name = 'Tom Cruise'
UNION
SELECT actor.id, cte.distance + 1
FROM cte
JOIN acting AS acting1 ON (cte.id = acting1.actor_id)
JOIN movie ON (acting1.movie_id = movie.id)
JOIN acting AS acting2 ON (movie.id = acting2.movie_id)
JOIN actor ON (acting2.actor_id = actor.id)
WHERE cte.id <> actor.id AND cte.distance + 1 <= 10
), distance_table (id, distance) AS (
SELECT id, MIN(distance) AS distance
FROM cte
GROUP BY id
)
SELECT 'Tom Cruise and ' || actor.name || ' are separated by ' ||
COALESCE(TO_CHAR(distance_table.distance, '999999'), 'more than 10') || ' degrees of separation'
FROM actor
LEFT JOIN distance_table ON (actor.id = distance_table.id)
WHERE actor.name = 'Robert Downey Jr';
尽管我认为您一般不会想直接从数据库中计算此类信息,但如果您想要一条消息告诉演员之间的路径,就像您提供的那样(汤姆克鲁斯是在 Days of Thunder with Robert Duvall -> Robert Duvall 在 Lucky You 和 Robert Downey Jr.) 中,那么这样的事情可以 return 那:
WITH RECURSIVE cte(id, name, distance, message) AS (
SELECT actor.id, actor.name, 0, ''
FROM actor
WHERE actor.name = 'Tom Cruise'
UNION
SELECT actor.id, actor.name, cte.distance + 1,
cte.message || '> ' || cte.name || ' was in ' ||
movie.title || ' with ' || actor.name || ' '
FROM cte
JOIN acting AS acting1 ON (cte.id = acting1.actor_id)
JOIN movie ON (acting1.movie_id = movie.id)
JOIN acting AS acting2 ON (movie.id = acting2.movie_id)
JOIN actor ON (acting2.actor_id = actor.id)
WHERE cte.id <> actor.id AND cte.distance + 1 <= 10
), distance_table (id, distance) AS (
SELECT id, MIN(distance) AS distance
FROM cte
GROUP BY id
)
SELECT id, name, message, distance
FROM cte
WHERE (id, distance) IN (SELECT * FROM distance_table)
ORDER BY distance;
这是一个尝试(没有 CTE)。我碰巧有一份包含 4175 个美国城邦对的列表。 (想想 state==movie 和 city==actor。)
这是来自 table us
的设置:
SET NAMES utf8 COLLATE utf8_unicode_ci;
DROP TABLE IF EXISTS p_mapping; -- state-city pairs (movie-actor)
CREATE TABLE p_mapping (
state char(2) CHARACTER SET ascii NOT NULL,
city varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL,
PRIMARY KEY(state, city),
INDEX(city, state)
) ENGINE=InnoDB;
INSERT INTO p_mapping (state, city)
SELECT state, city FROM us;
DROP TABLE IF EXISTS p_cities; -- city ~= actor
CREATE TABLE p_cities (
depth TINYINT UNSIGNED NOT NULL DEFAULT 0,
from_state char(2) CHARACTER SET ascii NOT NULL DEFAULT '',
city VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL,
PRIMARY KEY(city)
) ENGINE=InnoDB COMMENT 'SO 55717636';
INSERT INTO p_cities (city)
SELECT DISTINCT city FROM p_mapping;
DROP TABLE IF EXISTS p_states; -- state ~= movie
CREATE TABLE p_states (
depth TINYINT UNSIGNED NOT NULL DEFAULT 0,
from_city VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
state char(2) CHARACTER SET ascii NOT NULL,
PRIMARY KEY(state)
) ENGINE=InnoDB COMMENT 'SO 55717636';
INSERT INTO p_states (state)
SELECT DISTINCT state FROM p_mapping;
-- 我选择了连接奥马哈(仅内布拉斯加州)和伯明翰(多个州)的目标。首先进行一些初始化:
SET @city := 'Omaha'; -- starting here
UPDATE p_cities
SET depth = 1
WHERE city = @city;
UPDATE p_states AS s
JOIN p_mapping AS m USING(state)
SET s.from_city = @city,
s.depth = 1
WHERE m.city = @city;
SET @depth := 1;
-- 然后重复以下10次或者直到rows_affected降为0。迭代3次后停止。
UPDATE p_cities AS c
JOIN p_mapping AS m USING(city)
JOIN p_states AS s USING(state)
SET c.from_state = m.state,
c.depth = s.depth + 1
WHERE s.depth = @depth
AND c.depth = 0;
SET @depth := @depth + 1;
UPDATE p_states AS s
JOIN p_mapping AS m USING(state)
JOIN p_cities AS c USING(city)
SET s.from_city = m.city,
s.depth = c.depth
WHERE c.depth = @depth
AND s.depth = 0;
-- 结束循环(算法结束)
-- 正确路径:奥马哈 -> 东北 -> 哥伦布 -> 俄亥俄州 -> 雅典 -> 阿拉巴马州 -> 伯明翰 -- 注意这是如何列出答案的(但是垂直的):
SELECT * FROM p_cities
WHERE city in ('Omaha', 'Columbus', 'Athens', 'Birmingham')
ORDER BY depth;
+-------+------------+------------+
| depth | from_state | city |
+-------+------------+------------+
| 1 | | Omaha |
| 2 | NE | Columbus |
| 3 | OH | Athens |
| 4 | AL | Birmingham |
+-------+------------+------------+
4 rows in set (0.00 sec)
-- 'Proof' 链接适用于以下答案:
SELECT * FROM p_mapping
WHERE city IN ('Omaha', 'Columbus', 'Athens', 'Birmingham')
AND state IN ('NE', 'OH', 'TN', 'AL');
+-------+------------+
| state | city |
+-------+------------+
| AL | Athens |
| OH | Athens |
| TN | Athens |
| AL | Birmingham |
| NE | Columbus |
| OH | Columbus |
| NE | Omaha |
+-------+------------+
7 rows in set (0.00 sec)
--(其他table)
SELECT * FROM p_states
WHERE from_city IN ('Omaha', 'Columbus', 'Athens', 'Birmingham')
OR state IN ('NE', 'OH', 'TN', 'AL')
ORDER BY depth;
+-------+-----------+-------+
| depth | from_city | state |
+-------+-----------+-------+
| 1 | Omaha | NE |
| 2 | Columbus | GA |
| 2 | Columbus | IN |
| 2 | Columbus | MS |
| 2 | Columbus | OH |
| 3 | Athens | AL |
| 3 | Athens | TN |
+-------+-----------+-------+
7 rows in set (0.00 sec)