multiprocessing.Pool returns 与输入 iterable 相比输出长度不同
multiprocessing.Pool returns different length of output compared to the input iterable
我写了一个 Python 程序,我想在调用程序时使用 multiprocessing.Pool
并行化 (MyProgram.__call__()
)。预期输出是一个字典列表 (dicts
),其长度与输入列表 images
相同。但是,当我使用 20 个 cpu 的 multiprocessing.Pool
对长度为 60 的输入进行测试时,我得到的输出只有长度 41.
下面是我的代码:
from acat.utilities import neighbor_shell_list, get_adj_matrix, get_max_delta_sum_path
from acat.build.adlayer import StochasticPatternGenerator as SPG
from acat.build.ordering import RandomOrderingGenerator as ROG
from ase.build import fcc111
from ase.io import read
from multiprocessing import Pool
import networkx as nx
import numpy as np
import os
class MyProgram(object):
def __init__(self, alpha=.75, n_jobs=os.cpu_count()):
self.alpha = alpha
self.n_jobs = n_jobs
def __call__(self, images):
# Parallelization
pool = Pool(self.n_jobs)
dicts = pool.map(self.get_dict, images)
return dicts
def get_dict(self, atoms):
d = {}
numbers = atoms.numbers
nblist = neighbor_shell_list(atoms, dx=0.3, neighbor_number=1, mic=True)
A = get_adj_matrix(nblist)
for i in range(len(A)):
nbrs = np.where(A[i] == 1)[0]
An = A[nbrs,:][:,nbrs]
Gn = nx.from_numpy_matrix(An)
path = max(nx.all_simple_paths(Gn, source=0, target=next(Gn.neighbors(0))),
key=lambda x: len(x))
path_numbers = list(numbers[nbrs[path]])
sorted_numbers = get_max_delta_sum_path(path_numbers)
lab1 = str(numbers[i])
lab2 = lab1 + ':' + ','.join(map(str, sorted_numbers))
labs = [lab1, lab2]
for idx, lab in enumerate(labs):
if idx == 0:
factor = 1
elif idx == 1:
factor = self.alpha
if lab in d:
d[lab] += factor
else:
d[lab] = factor
return d
if __name__ == '__main__':
MP = MyProgram(alpha=.75, n_jobs=20)
slab = fcc111('Pt', (4, 4, 4))
slab.center(vacuum=5., axis=2)
rog = ROG(slab, elements=['Ni', 'Pt'])
rog.run(num_gen=10)
slabs = read('orderings.traj', index=':')
spg = SPG(slabs, surface='fcc111',
adsorbate_species=['CO','OH','C'],
min_adsorbate_distance=3.,
composition_effect=True)
spg.run(num_gen=60, action='add', unique=False)
images = read('patterns.traj', index=':')
dicts = MP(images)
print(len(images))
print(len(dicts))
输出
60
41
有谁知道为什么 multiprocessing.Pool
返回的输出长度与输入长度不同?不幸的是,我无法在使用简化代码时重现这种现象。但是如果有人想要 运行 我的代码,你只需要通过 pip3 install acat
安装 acat
。提前致谢。
尝试将 call 更改为:
with Pool(self.n_jobs) as pool:
dicts = pool.map(self.get_dict, images)
return dicts
我怀疑问题是 __call__
returns 在所有作业完成之前。 len
可能以某种方式只看到已完成的作业,而不是所有作业。
我写了一个 Python 程序,我想在调用程序时使用 multiprocessing.Pool
并行化 (MyProgram.__call__()
)。预期输出是一个字典列表 (dicts
),其长度与输入列表 images
相同。但是,当我使用 20 个 cpu 的 multiprocessing.Pool
对长度为 60 的输入进行测试时,我得到的输出只有长度 41.
下面是我的代码:
from acat.utilities import neighbor_shell_list, get_adj_matrix, get_max_delta_sum_path
from acat.build.adlayer import StochasticPatternGenerator as SPG
from acat.build.ordering import RandomOrderingGenerator as ROG
from ase.build import fcc111
from ase.io import read
from multiprocessing import Pool
import networkx as nx
import numpy as np
import os
class MyProgram(object):
def __init__(self, alpha=.75, n_jobs=os.cpu_count()):
self.alpha = alpha
self.n_jobs = n_jobs
def __call__(self, images):
# Parallelization
pool = Pool(self.n_jobs)
dicts = pool.map(self.get_dict, images)
return dicts
def get_dict(self, atoms):
d = {}
numbers = atoms.numbers
nblist = neighbor_shell_list(atoms, dx=0.3, neighbor_number=1, mic=True)
A = get_adj_matrix(nblist)
for i in range(len(A)):
nbrs = np.where(A[i] == 1)[0]
An = A[nbrs,:][:,nbrs]
Gn = nx.from_numpy_matrix(An)
path = max(nx.all_simple_paths(Gn, source=0, target=next(Gn.neighbors(0))),
key=lambda x: len(x))
path_numbers = list(numbers[nbrs[path]])
sorted_numbers = get_max_delta_sum_path(path_numbers)
lab1 = str(numbers[i])
lab2 = lab1 + ':' + ','.join(map(str, sorted_numbers))
labs = [lab1, lab2]
for idx, lab in enumerate(labs):
if idx == 0:
factor = 1
elif idx == 1:
factor = self.alpha
if lab in d:
d[lab] += factor
else:
d[lab] = factor
return d
if __name__ == '__main__':
MP = MyProgram(alpha=.75, n_jobs=20)
slab = fcc111('Pt', (4, 4, 4))
slab.center(vacuum=5., axis=2)
rog = ROG(slab, elements=['Ni', 'Pt'])
rog.run(num_gen=10)
slabs = read('orderings.traj', index=':')
spg = SPG(slabs, surface='fcc111',
adsorbate_species=['CO','OH','C'],
min_adsorbate_distance=3.,
composition_effect=True)
spg.run(num_gen=60, action='add', unique=False)
images = read('patterns.traj', index=':')
dicts = MP(images)
print(len(images))
print(len(dicts))
输出
60
41
有谁知道为什么 multiprocessing.Pool
返回的输出长度与输入长度不同?不幸的是,我无法在使用简化代码时重现这种现象。但是如果有人想要 运行 我的代码,你只需要通过 pip3 install acat
安装 acat
。提前致谢。
尝试将 call 更改为:
with Pool(self.n_jobs) as pool:
dicts = pool.map(self.get_dict, images)
return dicts
我怀疑问题是 __call__
returns 在所有作业完成之前。 len
可能以某种方式只看到已完成的作业,而不是所有作业。