如果我在 python 中使用多处理但我的函数没有 return 任何东西,我是否需要调用 get() ?
Do I need to call get() if I use multiprocessing in python but my function does not return anything?
我想利用 python 的多处理模块来并行化这个简单的例子:
import numpy as np
import h5py
import os
import matplotlib.pyplot as plt
from multiprocessing import Pool
def load_array(path, variable):
try:
return np.array(h5py.File(path, "r").get(variable))
except:
raise FileNotFoundError("Corrupted file: {}".format(path))
def mat2img(rootdir, save_path, variable):
fig = plt.figure()
print("Processing " + rootdir)
for subdir, dirs, files in os.walk(rootdir):
for file in files:
arr = load_array(os.path.join(subdir, file), variable).T
fig.subplots_adjust(top=1, bottom=0, right=1, left=0)
plt.pcolormesh(np.arange(0, arr.shape[1]), np.arange(0, arr.shape[0]), arr, cmap="jet")
plt.axis("off")
plt.savefig(os.path.join(save_path, subdir.split(os.path.sep)[-1], file + ".jpg"))
plt.clf()
if __name__ == '__main__':
with Pool(processes=3) as pool:
pool.apply_async(mat2img, ("O:\data1", "O:\spectrograms", "spectrum"))
pool.apply_async(mat2img, ("O:\data2", "O:\spectrograms", "spectrum"))
pool.apply_async(mat2img, ("O:\data3", "O:\spectrograms", "spectrum"))
然而,这没有任何作用,就好像 apply_async
没有调用任何函数一样。从 documentation 我看到每个 apply_async
都分配给了某个变量 res
。即使我的函数没有 return 任何东西,我也需要做同样的事情吗?如果是这样,该变量 res
包含什么,我将调用什么 get()
?我哪里弄错了?
您使用 appy_async
安排工作。然后你必须等到他们完成。如果你不等,他们甚至都不会开始。
with Pool(processes=3) as pool:
pool.apply_async(mat2img, ("O:\data1", "O:\spectrograms", "spectrum"))
pool.apply_async(mat2img, ("O:\data2", "O:\spectrograms", "spectrum"))
pool.apply_async(mat2img, ("O:\data3", "O:\spectrograms", "spectrum"))
pool.close() # Do not accept any more jobs.
pool.join(timeout=1000) # Wait until all async jobs complete.
或者,您可以.get()
确保每个作业都完成:
with Pool(processes=3) as pool:
# Schedule the jobs.
jobs = [pool.apply_async(mat2img, (dest, "O:\spectrograms", "spectrum"))
for dest in ("O:\data1", "O:\data2", "O:\data3")]
# Wait for the jobs to complete.
for job in jobs:
job.get(timeout=100)
正如@AndreaCorbellini 正确指出的那样,如果您的工作没有 return 您关心的任何结果,您可以 job.wait()
而不是 job.get()
。
我想利用 python 的多处理模块来并行化这个简单的例子:
import numpy as np
import h5py
import os
import matplotlib.pyplot as plt
from multiprocessing import Pool
def load_array(path, variable):
try:
return np.array(h5py.File(path, "r").get(variable))
except:
raise FileNotFoundError("Corrupted file: {}".format(path))
def mat2img(rootdir, save_path, variable):
fig = plt.figure()
print("Processing " + rootdir)
for subdir, dirs, files in os.walk(rootdir):
for file in files:
arr = load_array(os.path.join(subdir, file), variable).T
fig.subplots_adjust(top=1, bottom=0, right=1, left=0)
plt.pcolormesh(np.arange(0, arr.shape[1]), np.arange(0, arr.shape[0]), arr, cmap="jet")
plt.axis("off")
plt.savefig(os.path.join(save_path, subdir.split(os.path.sep)[-1], file + ".jpg"))
plt.clf()
if __name__ == '__main__':
with Pool(processes=3) as pool:
pool.apply_async(mat2img, ("O:\data1", "O:\spectrograms", "spectrum"))
pool.apply_async(mat2img, ("O:\data2", "O:\spectrograms", "spectrum"))
pool.apply_async(mat2img, ("O:\data3", "O:\spectrograms", "spectrum"))
然而,这没有任何作用,就好像 apply_async
没有调用任何函数一样。从 documentation 我看到每个 apply_async
都分配给了某个变量 res
。即使我的函数没有 return 任何东西,我也需要做同样的事情吗?如果是这样,该变量 res
包含什么,我将调用什么 get()
?我哪里弄错了?
您使用 appy_async
安排工作。然后你必须等到他们完成。如果你不等,他们甚至都不会开始。
with Pool(processes=3) as pool:
pool.apply_async(mat2img, ("O:\data1", "O:\spectrograms", "spectrum"))
pool.apply_async(mat2img, ("O:\data2", "O:\spectrograms", "spectrum"))
pool.apply_async(mat2img, ("O:\data3", "O:\spectrograms", "spectrum"))
pool.close() # Do not accept any more jobs.
pool.join(timeout=1000) # Wait until all async jobs complete.
或者,您可以.get()
确保每个作业都完成:
with Pool(processes=3) as pool:
# Schedule the jobs.
jobs = [pool.apply_async(mat2img, (dest, "O:\spectrograms", "spectrum"))
for dest in ("O:\data1", "O:\data2", "O:\data3")]
# Wait for the jobs to complete.
for job in jobs:
job.get(timeout=100)
正如@AndreaCorbellini 正确指出的那样,如果您的工作没有 return 您关心的任何结果,您可以 job.wait()
而不是 job.get()
。