在 python 中使用多处理删除文件
Deleting files with multiprocessing in python
我正在使用以下代码删除 python 中的大量文件:
import os
from multiprocessing import Pool
def deleteFiles(loc):
def Fn_deleteFiles(inp):
[fn, loc] = [inp['fn'], inp['loc']]
os.remove(os.path.join(loc, fn))
p = Pool(5)
for path, subdirs, files in os.walk(loc):
if len(files) > 0:
inpData = [{'fn':x, 'loc':loc} for x in files]
p.map(Fn_deleteFiles, inpData)
p.close()
if __name__ == '__main__':
loc = r'C:\myDriveWithFilesToDelete'
deleteFiles(loc)
我收到以下错误:
File "C:\Program Files\Python 3.5\lib\multiprocessing\reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
AttributeError: Can't pickle local object 'deleteFiles.<locals>.Fn_deleteFiles'
问题是您正在函数内部创建一个函数。
函数 Fn_deleteFiles(inp)
是在 deleteFiles(loc)
.
中定义的
这意味着 Fn_deleteFiles(inp)
_only_ 在 deleteFiles(loc)
为 运行 时生成。
问题是,在内部,multiprocessing.pool.Pool()
调用 pickle
库将函数对象从这个 python 进程传输到一个新的 python 函数,即正在生成。
然而,pickle
将无法 stringify 函数,如果它无法定位函数原点。
这是演示类似错误的演示。
import pickle
def foo():
def bar():
return "Hello"
return bar
bar = foo()
if __name__ == '__main__':
s = pickle.dumps(bar)
会导致同样的错误:
Traceback (most recent call last):
File ".../stacktest.py", line 10, in <module>
s = pickle.dumps(bar)
AttributeError: Can't pickle local object 'foo.<locals>.bar'
因此,要修复此错误,您可以改用 multiprocessing.pool.ThreadPool
,因为它不会 pickle。
import os
from multiprocessing.pool import ThreadPool as Pool
def deleteFiles(loc):
def Fn_deleteFiles(inp):
[fn, loc] = [inp['fn'], inp['loc']]
os.remove(os.path.join(loc, fn))
p = Pool(5)
for path, subdirs, files in os.walk(loc):
if len(files) > 0:
inpData = [{'fn':x, 'loc':loc} for x in files]
p.map(Fn_deleteFiles, inpData)
p.close()
if __name__ == '__main__':
loc = 'DriveWithFilesToDelete'
deleteFiles(loc)
或者,您可以在 deleteFiles(loc)
之外定义 Fn_deleteFiles(inp)
来解决这个问题。
警告 由于我不明白的原因,这个答案将挂在空闲解释器中。
import os
from multiprocessing import Pool
def Fn_deleteFiles(inp):
print("Delete", inp)
[fn, loc] = [inp['fn'], inp['loc']]
os.remove(os.path.join(loc, fn))
def deleteFiles(loc):
p = Pool(5)
for path, subdirs, files in os.walk(loc):
if len(files) > 0:
inpData = [{'fn':x, 'loc':loc} for x in files]
p.map(Fn_deleteFiles, inpData)
p.close()
if __name__ == '__main__':
loc = 'DriveWithFilesToDelete'
deleteFiles(loc)
我正在使用以下代码删除 python 中的大量文件:
import os
from multiprocessing import Pool
def deleteFiles(loc):
def Fn_deleteFiles(inp):
[fn, loc] = [inp['fn'], inp['loc']]
os.remove(os.path.join(loc, fn))
p = Pool(5)
for path, subdirs, files in os.walk(loc):
if len(files) > 0:
inpData = [{'fn':x, 'loc':loc} for x in files]
p.map(Fn_deleteFiles, inpData)
p.close()
if __name__ == '__main__':
loc = r'C:\myDriveWithFilesToDelete'
deleteFiles(loc)
我收到以下错误:
File "C:\Program Files\Python 3.5\lib\multiprocessing\reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
AttributeError: Can't pickle local object 'deleteFiles.<locals>.Fn_deleteFiles'
问题是您正在函数内部创建一个函数。
函数 Fn_deleteFiles(inp)
是在 deleteFiles(loc)
.
这意味着 Fn_deleteFiles(inp)
_only_ 在 deleteFiles(loc)
为 运行 时生成。
问题是,在内部,multiprocessing.pool.Pool()
调用 pickle
库将函数对象从这个 python 进程传输到一个新的 python 函数,即正在生成。
然而,pickle
将无法 stringify 函数,如果它无法定位函数原点。
这是演示类似错误的演示。
import pickle
def foo():
def bar():
return "Hello"
return bar
bar = foo()
if __name__ == '__main__':
s = pickle.dumps(bar)
会导致同样的错误:
Traceback (most recent call last):
File ".../stacktest.py", line 10, in <module>
s = pickle.dumps(bar)
AttributeError: Can't pickle local object 'foo.<locals>.bar'
因此,要修复此错误,您可以改用 multiprocessing.pool.ThreadPool
,因为它不会 pickle。
import os
from multiprocessing.pool import ThreadPool as Pool
def deleteFiles(loc):
def Fn_deleteFiles(inp):
[fn, loc] = [inp['fn'], inp['loc']]
os.remove(os.path.join(loc, fn))
p = Pool(5)
for path, subdirs, files in os.walk(loc):
if len(files) > 0:
inpData = [{'fn':x, 'loc':loc} for x in files]
p.map(Fn_deleteFiles, inpData)
p.close()
if __name__ == '__main__':
loc = 'DriveWithFilesToDelete'
deleteFiles(loc)
或者,您可以在 deleteFiles(loc)
之外定义 Fn_deleteFiles(inp)
来解决这个问题。
警告 由于我不明白的原因,这个答案将挂在空闲解释器中。
import os
from multiprocessing import Pool
def Fn_deleteFiles(inp):
print("Delete", inp)
[fn, loc] = [inp['fn'], inp['loc']]
os.remove(os.path.join(loc, fn))
def deleteFiles(loc):
p = Pool(5)
for path, subdirs, files in os.walk(loc):
if len(files) > 0:
inpData = [{'fn':x, 'loc':loc} for x in files]
p.map(Fn_deleteFiles, inpData)
p.close()
if __name__ == '__main__':
loc = 'DriveWithFilesToDelete'
deleteFiles(loc)