Python 线程处理 - 内存不足
Python Threading - Out of Memory
我目前正在 python 中解决一个问题,以确定在安排交货时采用的最佳路线。对我的代码的高层次理解是,我读入了所有建筑物(输入中“:”之前的值),然后计算通往这些建筑物的路线的所有可能性。然后我将计算分成一个线程,用于生成的每个组合和 return 到 return 返回 'home' 建筑物的总时间(在所有情况下建筑物 'abc' ).
我下面的代码在较小的数据子集(总共 4 座建筑物)上运行良好,但是当我将代码扩展到 13 座建筑物(所需数量)时。我在执行期间 运行 变成了 Memory Error
。
我对如何解决这个问题有些困惑,我以前从未遇到过以指数级增长的问题。我的解决方案必须包括线程。任何 suggestions/tips 将不胜感激。
Input.txt(小子集):
abc : 0 5 7 3
def : 4 0 3 6
ghi : 6 4 0 4
jkl : 4 5 6 0
Input.txt(完整数据):
abc : 0 5 7 3 2 4 6 2 1 5 8 4 5
def : 4 0 3 6 7 2 3 4 5 6 7 8 6
ghi : 6 4 0 4 9 9 9 9 9 9 9 9 7
jkl : 4 5 6 0 2 3 7 8 6 9 2 8 3
mno : 1 2 3 4 0 9 8 7 6 5 3 2 2
pqr : 9 8 3 4 1 0 9 8 3 5 7 9 2
stu : 1 8 9 4 2 1 0 9 8 7 2 1 1
vwx : 3 2 1 9 4 1 5 0 9 8 2 5 8
yza : 1 9 8 2 3 7 4 6 0 1 4 2 6
bcd : 8 9 1 4 6 2 4 2 1 0 9 3 4
efg : 7 7 7 7 8 9 1 2 3 9 0 4 3
hij : 6 1 2 4 9 0 2 1 3 9 1 0 8
klm : 1 6 3 8 3 5 9 4 7 2 1 5 0
当前代码:
import time
import os
import threading
import sys
from itertools import permutations
from functools import reduce
inputFile = 'Input.txt'
outputFile = 'output2.txt'
f=open(inputFile,'r')
line=f.readline()
buildings=[]
timings=[]
results={}
def run_me(TimeMatrix,combination,results,buildingDict):
my_lock.acquire()
results[' '.join(map(str, combination))] = GenerateTiming(TimeMatrix,combination,buildingDict)
my_lock.release()
def GenerateTiming(TimeMatrix,combination,buildingDict):
current=combination
mySum=[]
for i in range(len(current)-1):
currentBuilding=buildingDict[current[i]]
nextBuilding=buildingDict[current[i+1]]
mySum.append(TimeMatrix[currentBuilding-1][nextBuilding])
result=sum(mySum)
return(result)
while line:
b=line.split(":")[0]
t=line.split(":")[1]
b=b.strip()
t=t.strip()
buildings.append(b)
timings.append(t)
home=buildings[0]
line=f.readline()
combinations=[]
first, *rest = buildings
for p in permutations(rest):
combinations.append([first,*p,first])
bldLKP=combinations[0]
buildingDict={}
for i in range(1,len(bldLKP)):
buildingDict[bldLKP[i-1]] = i
i=i+1
TimeMatrix=[[i] + [int(n) for n in s.split()] for i, s in enumerate(timings, 1)]
#Threading Section
my_lock=threading.Lock()
my_threads=list()
for comb in combinations:
my_threads.append(threading.Thread(target=run_me,args=(TimeMatrix,comb,results,buildingDict)))
for current_thread in my_threads:
current_thread.start()
for current_thread in my_threads:
current_thread.join()
lowest=min(results.values())
final=[key for key in results if results[key]==lowest]
print(' '.join(map(str, final)),lowest)
编辑:我应该提到我认为问题出在以下代码中,我在其中识别建筑物的所有可能组合。但是,我不确定如何以其他方式做到这一点,因为需要检查每条路径的最快路线。
combinations=[]
first, *rest = buildings
for p in permutations(rest):
combinations.append([first,*p,first])
在您的代码中创建排列,然后 运行 线程计算每条路线的总和(时间)。您的代码线程数量 运行 是
小子集(4 栋建筑)
您为其余建筑物(不包括第一个建筑物)创建排列,因此数量将为 (4-1)! = 3 * 2 * 1 = 6
完整数据(13 栋建筑)
(13-1)! = 479001600(应该创建这样数量的线程。
我建议在这种情况下不要使用线程。
我编写了简单的递归函数来实现您的需要。我对排列有很大的性能改进。如果当前时间大于最小时间,它不会更深。请看看我的实现
import threading
time_matrix = {}
buildings = []
with open('input.txt', 'r') as f:
lines = []
for row in f.readlines():
building, line = row.split(':')
building = building.strip()
buildings.append(building)
lines.append(line.strip())
time_matrix[building] = {}
for building, line in zip(buildings, lines):
for index, time_to_reach in enumerate(line.split(' ')):
to_building = buildings[index]
time_matrix[building][to_building] = int(time_to_reach)
first, *rest = buildings
results = []
class MyThread(threading.Thread):
def __init__(self, time_matrix, current_building, to_visit_buildings, current_path, current_time):
super().__init__()
self.time_matrix = time_matrix
self.current_building = current_building
self.to_visit_buildings = to_visit_buildings
self.current_path = current_path
self.current_time = current_time
def run(self):
min_time, min_paths = self.calculate(self.time_matrix, self.current_building, self.to_visit_buildings, self.current_path, self.current_time)
if min_paths and min_time:
results.append((min_time, min_paths))
def calculate(self, time_matrix, current_building, to_visit_buildings, current_path, current_time, min_time=None, min_paths=None):
if min_paths and min_time < current_time:
return None, None
if not to_visit_buildings:
current_time += time_matrix[current_building][first]
if min_time is None or min_time > current_time:
path = [first, *current_path, first]
if min_time == current_time:
return current_time, min_paths + [path]
else:
return current_time, [path]
for building in to_visit_buildings:
new_to_visit_buildings = [b for b in to_visit_buildings if b != building]
new_current_path = [*current_path, building]
new_current_time = current_time + time_matrix[current_building][building]
new_min_time, new_min_paths = self.calculate(time_matrix, building, new_to_visit_buildings, new_current_path, new_current_time, min_time, min_paths)
if new_min_paths and new_min_time and (not min_time or new_min_time < min_time):
min_time = new_min_time
min_paths = new_min_paths
return min_time, min_paths
my_threads = []
for building in rest:
to_visit = [b for b in rest if b != building]
current_time = time_matrix[first][building]
my_threads.append(MyThread(time_matrix, building, to_visit, [building], current_time))
for current_thread in my_threads:
current_thread.start()
for current_thread in my_threads:
current_thread.join()
min_paths, min_time = min(results, key=lambda r: r[0])
print(min_paths, min_time)
对于它输出的完整数据:
['abc', 'yza', 'bcd', 'ghi', 'jkl', 'efg', 'stu', 'hij', 'vwx', 'def', 'pqr', 'mno', 'klm', 'abc'] 20
我目前正在 python 中解决一个问题,以确定在安排交货时采用的最佳路线。对我的代码的高层次理解是,我读入了所有建筑物(输入中“:”之前的值),然后计算通往这些建筑物的路线的所有可能性。然后我将计算分成一个线程,用于生成的每个组合和 return 到 return 返回 'home' 建筑物的总时间(在所有情况下建筑物 'abc' ).
我下面的代码在较小的数据子集(总共 4 座建筑物)上运行良好,但是当我将代码扩展到 13 座建筑物(所需数量)时。我在执行期间 运行 变成了 Memory Error
。
我对如何解决这个问题有些困惑,我以前从未遇到过以指数级增长的问题。我的解决方案必须包括线程。任何 suggestions/tips 将不胜感激。
Input.txt(小子集):
abc : 0 5 7 3
def : 4 0 3 6
ghi : 6 4 0 4
jkl : 4 5 6 0
Input.txt(完整数据):
abc : 0 5 7 3 2 4 6 2 1 5 8 4 5
def : 4 0 3 6 7 2 3 4 5 6 7 8 6
ghi : 6 4 0 4 9 9 9 9 9 9 9 9 7
jkl : 4 5 6 0 2 3 7 8 6 9 2 8 3
mno : 1 2 3 4 0 9 8 7 6 5 3 2 2
pqr : 9 8 3 4 1 0 9 8 3 5 7 9 2
stu : 1 8 9 4 2 1 0 9 8 7 2 1 1
vwx : 3 2 1 9 4 1 5 0 9 8 2 5 8
yza : 1 9 8 2 3 7 4 6 0 1 4 2 6
bcd : 8 9 1 4 6 2 4 2 1 0 9 3 4
efg : 7 7 7 7 8 9 1 2 3 9 0 4 3
hij : 6 1 2 4 9 0 2 1 3 9 1 0 8
klm : 1 6 3 8 3 5 9 4 7 2 1 5 0
当前代码:
import time
import os
import threading
import sys
from itertools import permutations
from functools import reduce
inputFile = 'Input.txt'
outputFile = 'output2.txt'
f=open(inputFile,'r')
line=f.readline()
buildings=[]
timings=[]
results={}
def run_me(TimeMatrix,combination,results,buildingDict):
my_lock.acquire()
results[' '.join(map(str, combination))] = GenerateTiming(TimeMatrix,combination,buildingDict)
my_lock.release()
def GenerateTiming(TimeMatrix,combination,buildingDict):
current=combination
mySum=[]
for i in range(len(current)-1):
currentBuilding=buildingDict[current[i]]
nextBuilding=buildingDict[current[i+1]]
mySum.append(TimeMatrix[currentBuilding-1][nextBuilding])
result=sum(mySum)
return(result)
while line:
b=line.split(":")[0]
t=line.split(":")[1]
b=b.strip()
t=t.strip()
buildings.append(b)
timings.append(t)
home=buildings[0]
line=f.readline()
combinations=[]
first, *rest = buildings
for p in permutations(rest):
combinations.append([first,*p,first])
bldLKP=combinations[0]
buildingDict={}
for i in range(1,len(bldLKP)):
buildingDict[bldLKP[i-1]] = i
i=i+1
TimeMatrix=[[i] + [int(n) for n in s.split()] for i, s in enumerate(timings, 1)]
#Threading Section
my_lock=threading.Lock()
my_threads=list()
for comb in combinations:
my_threads.append(threading.Thread(target=run_me,args=(TimeMatrix,comb,results,buildingDict)))
for current_thread in my_threads:
current_thread.start()
for current_thread in my_threads:
current_thread.join()
lowest=min(results.values())
final=[key for key in results if results[key]==lowest]
print(' '.join(map(str, final)),lowest)
编辑:我应该提到我认为问题出在以下代码中,我在其中识别建筑物的所有可能组合。但是,我不确定如何以其他方式做到这一点,因为需要检查每条路径的最快路线。
combinations=[]
first, *rest = buildings
for p in permutations(rest):
combinations.append([first,*p,first])
在您的代码中创建排列,然后 运行 线程计算每条路线的总和(时间)。您的代码线程数量 运行 是
小子集(4 栋建筑)
您为其余建筑物(不包括第一个建筑物)创建排列,因此数量将为 (4-1)! = 3 * 2 * 1 = 6
完整数据(13 栋建筑) (13-1)! = 479001600(应该创建这样数量的线程。
我建议在这种情况下不要使用线程。
我编写了简单的递归函数来实现您的需要。我对排列有很大的性能改进。如果当前时间大于最小时间,它不会更深。请看看我的实现
import threading
time_matrix = {}
buildings = []
with open('input.txt', 'r') as f:
lines = []
for row in f.readlines():
building, line = row.split(':')
building = building.strip()
buildings.append(building)
lines.append(line.strip())
time_matrix[building] = {}
for building, line in zip(buildings, lines):
for index, time_to_reach in enumerate(line.split(' ')):
to_building = buildings[index]
time_matrix[building][to_building] = int(time_to_reach)
first, *rest = buildings
results = []
class MyThread(threading.Thread):
def __init__(self, time_matrix, current_building, to_visit_buildings, current_path, current_time):
super().__init__()
self.time_matrix = time_matrix
self.current_building = current_building
self.to_visit_buildings = to_visit_buildings
self.current_path = current_path
self.current_time = current_time
def run(self):
min_time, min_paths = self.calculate(self.time_matrix, self.current_building, self.to_visit_buildings, self.current_path, self.current_time)
if min_paths and min_time:
results.append((min_time, min_paths))
def calculate(self, time_matrix, current_building, to_visit_buildings, current_path, current_time, min_time=None, min_paths=None):
if min_paths and min_time < current_time:
return None, None
if not to_visit_buildings:
current_time += time_matrix[current_building][first]
if min_time is None or min_time > current_time:
path = [first, *current_path, first]
if min_time == current_time:
return current_time, min_paths + [path]
else:
return current_time, [path]
for building in to_visit_buildings:
new_to_visit_buildings = [b for b in to_visit_buildings if b != building]
new_current_path = [*current_path, building]
new_current_time = current_time + time_matrix[current_building][building]
new_min_time, new_min_paths = self.calculate(time_matrix, building, new_to_visit_buildings, new_current_path, new_current_time, min_time, min_paths)
if new_min_paths and new_min_time and (not min_time or new_min_time < min_time):
min_time = new_min_time
min_paths = new_min_paths
return min_time, min_paths
my_threads = []
for building in rest:
to_visit = [b for b in rest if b != building]
current_time = time_matrix[first][building]
my_threads.append(MyThread(time_matrix, building, to_visit, [building], current_time))
for current_thread in my_threads:
current_thread.start()
for current_thread in my_threads:
current_thread.join()
min_paths, min_time = min(results, key=lambda r: r[0])
print(min_paths, min_time)
对于它输出的完整数据: ['abc', 'yza', 'bcd', 'ghi', 'jkl', 'efg', 'stu', 'hij', 'vwx', 'def', 'pqr', 'mno', 'klm', 'abc'] 20