使用 SciPy 的最小化来查找图中的最短路径
Using SciPy's minimize to find the shortest path in a graph
我试图在下图中找到从 G 到 C 的最短路径,并且我编写了以下代码来完成它。
首先我给出我认为应该使用的方程式和约束条件:
我们最大化 dc 服从:
db-da <= 8
df-da <= 10
直流数据库 <= 4
dd-dc <= 3
de-dd <= 25
df-dd <= 18
dd-de <= 9
dg-de <= 7
da-df <= 5
db-df <= 7
dc-df <= 3
de-df <= 2
dd-dg <= 2
dh-dg <= 3
da-dh <= 4
db-dh <= 9
import numpy as np
import scipy as scp
from scipy.optimize import minimize
a,b,c,d,e,f,g,h = 0,1,2,3,4,5,6,7
def objective(x, sign = -1.0):
return sign*x[c]
def c1(x, sign = -1.0):
return sign*(x[b]-x[a]-8)
def c2(x, sign = -1.0):
return sign*(x[f]-x[a]-10)
def c3(x, sign = -1.0):
return sign*(x[c]-x[b]-4)
def c4(x, sign = -1.0):
return sign*(x[d]-x[c]-3)
def c5(x, sign = -1.0):
return sign*(x[e]-x[d]-25)
def c6(x, sign = -1.0):
return sign*(x[f]-x[d]-18)
def c7(x, sign = -1.0):
return sign*(x[d]-x[e]-9)
def c8(x, sign = -1.0):
return sign*(x[g]-x[e]-7)
def c9(x, sign = -1.0):
return sign*(x[a]-x[f]-5)
def c10(x, sign = -1.0):
return sign*(x[b]-x[f]-7)
def c11(x, sign = -1.0):
return sign*(x[c]-x[f]-3)
def c12(x, sign = -1.0):
return sign*(x[e]-x[f]-2)
def c13(x, sign = -1.0):
return sign*(x[d]-x[g]-2)
def c14(x, sign = -1.0):
return sign*(x[h]-x[g]-3)
def c15(x, sign = -1.0):
return sign*(x[a]-x[h]-4)
def c16(x, sign = -1.0):
return sign*(x[b]-x[h]-9)
def c17(x, sign = -1.0):
return x[g]
cs = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17]
x0 = [0 for i in range(8)]
b = (0,None)
bs = tuple([b for i in range(8)])
cons = []
for i in range(16):
cons.append({'type': 'ineq', 'fun':cs[i]})
cons.append({'type': 'eq', 'fun':c17})
sol = minimize(objective, x0, method = 'SLSQP', bounds=bs, constraints=cons)
for val in sol['x']:
print(round(val))
可以仅使用代数来求解每个变量,但使用 LP 来求解应该更有效。
我相信仅通过图表的手动跟踪,最佳路径是 G-H-B-C,总成本为 16。但是,上面的代码表明最佳路径是 G-H-A-F-C,其成本在他们不这样做之前是有意义的:3-4-1-3。它说最佳路径长度是 11。它看起来非常接近有效答案,除了它认为你可以以 1 的成本从 A 到 F。
[编辑:我刚刚注意到我错过了从 B 到 E 的边缘,但这似乎无关紧要,事实上,当我将它添加到算法中时,答案并没有改变。]
这个(工作)代码是:
- 有点难看(没有仔细分析很好用的图表 data-structures 这个任务)
- 使用 scipy 的 linprog(method='simplex'),我不再信任它(请参阅 github 处的问题)
- 遵循 wikipedia
中描述的 LP
- 不适用于 real-world 用途
- 低效data-structures
- 低效和dense-only求解器
请务必阅读我上面的评论!
代码
import numpy as np
from scipy.optimize import linprog
""" DATA"""
edges = [('A', 'B', 8),
('A', 'F', 10),
('B', 'C', 4),
('B', 'E', 10),
('C', 'D', 3),
('D', 'E', 25),
('D', 'F', 18),
('E', 'D', 9),
('E', 'G', 7),
('F', 'A', 5),
('F', 'B', 7),
('F', 'C', 3),
('F', 'E', 2),
('G', 'D', 2),
('G', 'H', 3),
('H', 'A', 4),
('H', 'B', 9)]
s, t = 'G', 'C'
""" Preprocessing """
nodes = sorted(set([i[0] for i in edges])) # assumption: each node has an outedge
n_nodes = len(nodes)
n_edges = len(edges)
edge_matrix = np.zeros((len(nodes), len(nodes)), dtype=int)
for edge in edges:
i, j, v = edge
i_ind = nodes.index(i) # slow
j_ind = nodes.index(j) # """
edge_matrix[i_ind, j_ind] = v
nnz_edges = np.nonzero(edge_matrix)
edge_dict = {}
counter = 0
for e in range(n_edges):
a, b = nnz_edges[0][e], nnz_edges[1][e]
edge_dict[(a,b)] = counter
counter += 1
s_ind = nodes.index(s)
t_ind = nodes.index(t)
""" LP """
bounds = [(0, 1) for i in range(n_edges)]
c = [i[2] for i in edges]
A_rows = []
b_rows = []
for source in range(n_nodes):
out_inds = np.flatnonzero(edge_matrix[source, :])
in_inds = np.flatnonzero(edge_matrix[:, source])
rhs = 0
if source == s_ind:
rhs = 1
elif source == t_ind:
rhs = -1
n_out = len(out_inds)
n_in = len(in_inds)
out_edges = [edge_dict[a, b] for a, b in np.vstack((np.full(n_out, source), out_inds)).T]
in_edges = [edge_dict[a, b] for a, b in np.vstack((in_inds, np.full(n_in, source))).T]
A_row = np.zeros(n_edges)
A_row[out_edges] = 1
A_row[in_edges] = -1
A_rows.append(A_row)
b_rows.append(rhs)
A = np.vstack(A_rows)
b = np.array(b_rows)
res = linprog(c, A_eq=A, b_eq=b, bounds=bounds)
print(res)
输出:
fun: 16.0
message: 'Optimization terminated successfully.'
nit: 11
slack: array([1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0.])
status: 0
success: True
x: array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.])
LP 看起来像:
bounds
[(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]
objective / c
[8, 10, 4, 10, 3, 25, 18, 9, 7, 5, 7, 3, 2, 2, 3, 4, 9]
constraint-matrix A_eq / A
[[ 1. 1. 0. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 0. -1. 0.]
[-1. 0. 1. 1. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 0. -1.]
[ 0. 0. -1. 0. 1. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. -1. 1. 1. -1. 0. 0. 0. 0. 0. -1. 0. 0. 0.]
[ 0. 0. 0. -1. 0. -1. 0. 1. 1. 0. 0. 0. -1. 0. 0. 0. 0.]
[ 0. -1. 0. 0. 0. 0. -1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 1. 1. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. -1. 1. 1.]]
rhs / b
[ 0 0 -1 0 0 0 1 0]
这表明确实应该使用利用稀疏性的求解器!
我试图在下图中找到从 G 到 C 的最短路径,并且我编写了以下代码来完成它。
首先我给出我认为应该使用的方程式和约束条件:
我们最大化 dc 服从:
db-da <= 8
df-da <= 10
直流数据库 <= 4
dd-dc <= 3
de-dd <= 25
df-dd <= 18
dd-de <= 9
dg-de <= 7
da-df <= 5
db-df <= 7
dc-df <= 3
de-df <= 2
dd-dg <= 2
dh-dg <= 3
da-dh <= 4
db-dh <= 9
import numpy as np
import scipy as scp
from scipy.optimize import minimize
a,b,c,d,e,f,g,h = 0,1,2,3,4,5,6,7
def objective(x, sign = -1.0):
return sign*x[c]
def c1(x, sign = -1.0):
return sign*(x[b]-x[a]-8)
def c2(x, sign = -1.0):
return sign*(x[f]-x[a]-10)
def c3(x, sign = -1.0):
return sign*(x[c]-x[b]-4)
def c4(x, sign = -1.0):
return sign*(x[d]-x[c]-3)
def c5(x, sign = -1.0):
return sign*(x[e]-x[d]-25)
def c6(x, sign = -1.0):
return sign*(x[f]-x[d]-18)
def c7(x, sign = -1.0):
return sign*(x[d]-x[e]-9)
def c8(x, sign = -1.0):
return sign*(x[g]-x[e]-7)
def c9(x, sign = -1.0):
return sign*(x[a]-x[f]-5)
def c10(x, sign = -1.0):
return sign*(x[b]-x[f]-7)
def c11(x, sign = -1.0):
return sign*(x[c]-x[f]-3)
def c12(x, sign = -1.0):
return sign*(x[e]-x[f]-2)
def c13(x, sign = -1.0):
return sign*(x[d]-x[g]-2)
def c14(x, sign = -1.0):
return sign*(x[h]-x[g]-3)
def c15(x, sign = -1.0):
return sign*(x[a]-x[h]-4)
def c16(x, sign = -1.0):
return sign*(x[b]-x[h]-9)
def c17(x, sign = -1.0):
return x[g]
cs = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17]
x0 = [0 for i in range(8)]
b = (0,None)
bs = tuple([b for i in range(8)])
cons = []
for i in range(16):
cons.append({'type': 'ineq', 'fun':cs[i]})
cons.append({'type': 'eq', 'fun':c17})
sol = minimize(objective, x0, method = 'SLSQP', bounds=bs, constraints=cons)
for val in sol['x']:
print(round(val))
可以仅使用代数来求解每个变量,但使用 LP 来求解应该更有效。
我相信仅通过图表的手动跟踪,最佳路径是 G-H-B-C,总成本为 16。但是,上面的代码表明最佳路径是 G-H-A-F-C,其成本在他们不这样做之前是有意义的:3-4-1-3。它说最佳路径长度是 11。它看起来非常接近有效答案,除了它认为你可以以 1 的成本从 A 到 F。
[编辑:我刚刚注意到我错过了从 B 到 E 的边缘,但这似乎无关紧要,事实上,当我将它添加到算法中时,答案并没有改变。]
这个(工作)代码是:
- 有点难看(没有仔细分析很好用的图表 data-structures 这个任务)
- 使用 scipy 的 linprog(method='simplex'),我不再信任它(请参阅 github 处的问题)
- 遵循 wikipedia 中描述的 LP
- 不适用于 real-world 用途
- 低效data-structures
- 低效和dense-only求解器
请务必阅读我上面的评论!
代码
import numpy as np
from scipy.optimize import linprog
""" DATA"""
edges = [('A', 'B', 8),
('A', 'F', 10),
('B', 'C', 4),
('B', 'E', 10),
('C', 'D', 3),
('D', 'E', 25),
('D', 'F', 18),
('E', 'D', 9),
('E', 'G', 7),
('F', 'A', 5),
('F', 'B', 7),
('F', 'C', 3),
('F', 'E', 2),
('G', 'D', 2),
('G', 'H', 3),
('H', 'A', 4),
('H', 'B', 9)]
s, t = 'G', 'C'
""" Preprocessing """
nodes = sorted(set([i[0] for i in edges])) # assumption: each node has an outedge
n_nodes = len(nodes)
n_edges = len(edges)
edge_matrix = np.zeros((len(nodes), len(nodes)), dtype=int)
for edge in edges:
i, j, v = edge
i_ind = nodes.index(i) # slow
j_ind = nodes.index(j) # """
edge_matrix[i_ind, j_ind] = v
nnz_edges = np.nonzero(edge_matrix)
edge_dict = {}
counter = 0
for e in range(n_edges):
a, b = nnz_edges[0][e], nnz_edges[1][e]
edge_dict[(a,b)] = counter
counter += 1
s_ind = nodes.index(s)
t_ind = nodes.index(t)
""" LP """
bounds = [(0, 1) for i in range(n_edges)]
c = [i[2] for i in edges]
A_rows = []
b_rows = []
for source in range(n_nodes):
out_inds = np.flatnonzero(edge_matrix[source, :])
in_inds = np.flatnonzero(edge_matrix[:, source])
rhs = 0
if source == s_ind:
rhs = 1
elif source == t_ind:
rhs = -1
n_out = len(out_inds)
n_in = len(in_inds)
out_edges = [edge_dict[a, b] for a, b in np.vstack((np.full(n_out, source), out_inds)).T]
in_edges = [edge_dict[a, b] for a, b in np.vstack((in_inds, np.full(n_in, source))).T]
A_row = np.zeros(n_edges)
A_row[out_edges] = 1
A_row[in_edges] = -1
A_rows.append(A_row)
b_rows.append(rhs)
A = np.vstack(A_rows)
b = np.array(b_rows)
res = linprog(c, A_eq=A, b_eq=b, bounds=bounds)
print(res)
输出:
fun: 16.0
message: 'Optimization terminated successfully.'
nit: 11
slack: array([1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0.])
status: 0
success: True
x: array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.])
LP 看起来像:
bounds
[(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]
objective / c
[8, 10, 4, 10, 3, 25, 18, 9, 7, 5, 7, 3, 2, 2, 3, 4, 9]
constraint-matrix A_eq / A
[[ 1. 1. 0. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 0. -1. 0.]
[-1. 0. 1. 1. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 0. -1.]
[ 0. 0. -1. 0. 1. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. -1. 1. 1. -1. 0. 0. 0. 0. 0. -1. 0. 0. 0.]
[ 0. 0. 0. -1. 0. -1. 0. 1. 1. 0. 0. 0. -1. 0. 0. 0. 0.]
[ 0. -1. 0. 0. 0. 0. -1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 1. 1. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. -1. 1. 1.]]
rhs / b
[ 0 0 -1 0 0 0 1 0]
这表明确实应该使用利用稀疏性的求解器!