Python 线程处理 - 内存不足

Question

我目前正在 python 中解决一个问题，以确定在安排交货时采用的最佳路线。对我的代码的高层次理解是，我读入了所有建筑物（输入中“:”之前的值），然后计算通往这些建筑物的路线的所有可能性。然后我将计算分成一个线程，用于生成的每个组合和 return 到 return 返回 'home' 建筑物的总时间（在所有情况下建筑物 'abc' ).

我下面的代码在较小的数据子集（总共 4 座建筑物）上运行良好，但是当我将代码扩展到 13 座建筑物（所需数量）时。我在执行期间运行变成了 Memory Error。

我对如何解决这个问题有些困惑，我以前从未遇到过以指数级增长的问题。我的解决方案必须包括线程。任何 suggestions/tips 将不胜感激。

Input.txt（小子集）：

abc : 0 5 7 3
def : 4 0 3 6
ghi : 6 4 0 4
jkl : 4 5 6 0

Input.txt（完整数据）：

abc : 0 5 7 3 2 4 6 2 1 5 8 4 5
def : 4 0 3 6 7 2 3 4 5 6 7 8 6
ghi : 6 4 0 4 9 9 9 9 9 9 9 9 7
jkl : 4 5 6 0 2 3 7 8 6 9 2 8 3
mno : 1 2 3 4 0 9 8 7 6 5 3 2 2
pqr : 9 8 3 4 1 0 9 8 3 5 7 9 2
stu : 1 8 9 4 2 1 0 9 8 7 2 1 1
vwx : 3 2 1 9 4 1 5 0 9 8 2 5 8
yza : 1 9 8 2 3 7 4 6 0 1 4 2 6
bcd : 8 9 1 4 6 2 4 2 1 0 9 3 4
efg : 7 7 7 7 8 9 1 2 3 9 0 4 3
hij : 6 1 2 4 9 0 2 1 3 9 1 0 8
klm : 1 6 3 8 3 5 9 4 7 2 1 5 0

当前代码：

import time
import os
import threading
import sys
from itertools import permutations
from functools import reduce


inputFile = 'Input.txt'
outputFile = 'output2.txt'

f=open(inputFile,'r')
line=f.readline()
buildings=[]
timings=[]
results={}

def run_me(TimeMatrix,combination,results,buildingDict):
    my_lock.acquire()
    results[' '.join(map(str, combination))] = GenerateTiming(TimeMatrix,combination,buildingDict)
    my_lock.release()

def GenerateTiming(TimeMatrix,combination,buildingDict):
    current=combination
    mySum=[]
    for i in range(len(current)-1):
        currentBuilding=buildingDict[current[i]]
        nextBuilding=buildingDict[current[i+1]]
        mySum.append(TimeMatrix[currentBuilding-1][nextBuilding])
    result=sum(mySum)
    return(result)


while line: 
    b=line.split(":")[0]
    t=line.split(":")[1]
    b=b.strip()
    t=t.strip()
    buildings.append(b)
    timings.append(t)
    home=buildings[0]
    line=f.readline()



combinations=[]
first, *rest = buildings
for p in permutations(rest):
    combinations.append([first,*p,first])

bldLKP=combinations[0]
buildingDict={}

for i in range(1,len(bldLKP)):
    buildingDict[bldLKP[i-1]] = i
    i=i+1


TimeMatrix=[[i] + [int(n) for n in s.split()] for i, s in enumerate(timings, 1)]

#Threading Section
my_lock=threading.Lock()
my_threads=list()


for comb in combinations:
    my_threads.append(threading.Thread(target=run_me,args=(TimeMatrix,comb,results,buildingDict)))

for current_thread in my_threads:
    current_thread.start()

for current_thread in my_threads:
    current_thread.join()


lowest=min(results.values())
final=[key for key in results if results[key]==lowest]

print(' '.join(map(str, final)),lowest)

编辑：我应该提到我认为问题出在以下代码中，我在其中识别建筑物的所有可能组合。但是，我不确定如何以其他方式做到这一点，因为需要检查每条路径的最快路线。

combinations=[]
first, *rest = buildings
for p in permutations(rest):
    combinations.append([first,*p,first])

Answer 1

在您的代码中创建排列，然后运行线程计算每条路线的总和（时间）。您的代码线程数量运行是

小子集（4 栋建筑）

您为其余建筑物（不包括第一个建筑物）创建排列，因此数量将为 (4-1)！ = 3 * 2 * 1 = 6

完整数据（13 栋建筑）（13-1）！ = 479001600（应该创建这样数量的线程。

我建议在这种情况下不要使用线程。

我编写了简单的递归函数来实现您的需要。我对排列有很大的性能改进。如果当前时间大于最小时间，它不会更深。请看看我的实现

import threading

time_matrix = {}
buildings = []

with open('input.txt', 'r') as f:
    lines = []
    for row in f.readlines():
        building, line = row.split(':')
        building = building.strip()
        buildings.append(building)
        lines.append(line.strip())
        time_matrix[building] = {}

for building, line in zip(buildings, lines):
    for index, time_to_reach in enumerate(line.split(' ')):
        to_building = buildings[index]
        time_matrix[building][to_building] = int(time_to_reach)

first, *rest = buildings


results = []
class MyThread(threading.Thread):

    def __init__(self, time_matrix, current_building, to_visit_buildings, current_path, current_time):
        super().__init__()
        self.time_matrix = time_matrix
        self.current_building = current_building
        self.to_visit_buildings = to_visit_buildings
        self.current_path = current_path
        self.current_time = current_time

    def run(self):
        min_time, min_paths = self.calculate(self.time_matrix, self.current_building, self.to_visit_buildings, self.current_path, self.current_time)
        if min_paths and min_time:
            results.append((min_time, min_paths))

    def calculate(self, time_matrix, current_building, to_visit_buildings, current_path, current_time, min_time=None, min_paths=None):

        if min_paths and min_time < current_time:
            return None, None

        if not to_visit_buildings:
            current_time += time_matrix[current_building][first]
            if min_time is None or min_time > current_time:
                path = [first, *current_path, first]
                if min_time == current_time:
                    return current_time, min_paths + [path]
                else:
                    return current_time, [path]

        for building in to_visit_buildings:
            new_to_visit_buildings = [b for b in to_visit_buildings if b != building]
            new_current_path = [*current_path, building]
            new_current_time = current_time + time_matrix[current_building][building]
            new_min_time, new_min_paths = self.calculate(time_matrix, building, new_to_visit_buildings, new_current_path, new_current_time, min_time, min_paths)
            if new_min_paths and new_min_time and (not min_time or new_min_time < min_time):
                min_time = new_min_time
                min_paths = new_min_paths

        return min_time, min_paths

my_threads = []
for building in rest:
    to_visit = [b for b in rest if b != building]
    current_time = time_matrix[first][building]
    my_threads.append(MyThread(time_matrix, building, to_visit, [building], current_time))

for current_thread in my_threads:
    current_thread.start()

for current_thread in my_threads:
    current_thread.join()

min_paths, min_time = min(results, key=lambda r: r[0])

print(min_paths, min_time)

对于它输出的完整数据： ['abc', 'yza', 'bcd', 'ghi', 'jkl', 'efg', 'stu', 'hij', 'vwx', 'def', 'pqr', 'mno', 'klm', 'abc'] 20

Python 线程处理 - 内存不足

Python Threading - Out of Memory

python

optimization

python-multithreading