Class 派生自 collections.Counter 在 pickle 时丢失值
Class derived from collections.Counter looses values when pickled
我想创建一个像计数器一样工作的 class,但有一些额外的功能。这是精简版:
from collections import Counter
import pickle
class DerivedCounter(Counter):
def __init__(self, *args, capacity: int = 10):
super().__init__(*args)
self._capacity = capacity
dc = DerivedCounter(capacity = 200)
print("Original", dc._capacity)
print("Pickled", pickle.loads(pickle.dumps(dc))._capacity)
使用 Dask 时,此对象会被 pickled 和 unpickled。不幸的是,_capacity 的值在途中丢失了,输出为:
Original 200
Pickled 10
好像是用默认值而不是创建对象时选择的值!当我从 dict 而不是 Counter 继承时,我确实得到了这个:
Original 200
Pickled 200
那么 Counter 有什么奇怪的地方,我该如何解决这个问题?
最好的,鲍里斯
您正在腌制整个 class。当你把它拆开时,它就被称为新鲜的。
from collections import Counter
import pickle
class DerivedCounter(Counter):
def __init__(self, *args, capacity=10):
print('dc', args, capacity) #<---add this line
Counter.__init__(self, *args)
self._capacity = capacity
dc = DerivedCounter(capacity=200)
print(pickle.loads(pickle.dumps(dc))._capacity)
#>>> dc () 200
#>>> dc ({},) 10
#>>> 10
也许酸洗 dc.__dict__
就足够了。
from collections import Counter
import pickle
class Pickler:
@property
def pickled(self):
return pickle.dumps(self.__dict__)
def __init__(self, pickled=None):
if pickled:
self.cucumbered(pickled)
def cucumbered(self, pickled):
self.__dict__ = pickle.loads(pickled)
return self
class DerivedCounter(Counter, Pickler):
def __init__(self, *args, capacity=10, pickled=None):
Counter.__init__(self, *args)
Pickler.__init__(self, pickled)
if not pickled:
self._capacity = capacity
dc = DerivedCounter(capacity=200)
print(dc.cucumbered(dc.pickled)._capacity) # 200
dc2 = DerivedCounter(pickled=dc.pickled)
print(dc2._capacity) # 200
你可以使用copyreg
(内置模块copyreg)模块来做你想做的事。
它将复制您创建的实例中的所有状态。
class DerivedCounter(Counter):
def __init__(self, capacity: int = 10, *args):
print('init')
super().__init__(*args)
self._capacity = capacity
def pickle_dc(d):
print("pickling a DerivedCounter instance...")
return DerivedCounter, (d._capacity, { key: value for key, value in
dc.items()})
copyreg.pickle(DerivedCounter, pickle_dc)
dc = DerivedCounter(200, {'a':10})
print(dc._capacity)
a = pickle.dumps(dc)
print(pickle.loads(a)._capacity)
c = pickle.loads(a)
在 pickle 文档中我们可以看到 pickle 加载不会调用 __init__
doc here
@jasonharper,非常感谢您在评论中的解释! Counter的__reduce__
函数看起来确实有点特别。例如,函数 __getstate__
、__setstate__
、__getnewargs__
和 __getnewargs_ex__
未被调用。因此,重新定义 __reduce__
似乎是可行的方法。
这是您所指的重新定义:
https://github.com/python/cpython/blob/3.7/Lib/collections/init.py#L697
我不清楚为什么 Counter 需要这个。事实上,将其 __reduce__
功能恢复为 dict 之一似乎解决了我的问题:
from collections import Counter
import pickle
class DerivedCounter(Counter):
def __init__(self, *args, capacity: int = 10):
super().__init__(*args)
self._capacity = capacity
def __reduce__(self):
return dict.__reduce__(self)
dc = DerivedCounter(capacity = 200)
print("Original", dc._capacity)
print("Pickled", pickle.loads(pickle.dumps(dc))._capacity)
现在输出是我所期望的:
Original 200
Pickled 200
而且恢复的对象作为计数器的功能似乎完好无损。所以也许 Counter 中 __reduce__
的重新定义已经过时了,如果没有危害的话?
再次感谢!
我想创建一个像计数器一样工作的 class,但有一些额外的功能。这是精简版:
from collections import Counter
import pickle
class DerivedCounter(Counter):
def __init__(self, *args, capacity: int = 10):
super().__init__(*args)
self._capacity = capacity
dc = DerivedCounter(capacity = 200)
print("Original", dc._capacity)
print("Pickled", pickle.loads(pickle.dumps(dc))._capacity)
使用 Dask 时,此对象会被 pickled 和 unpickled。不幸的是,_capacity 的值在途中丢失了,输出为:
Original 200
Pickled 10
好像是用默认值而不是创建对象时选择的值!当我从 dict 而不是 Counter 继承时,我确实得到了这个:
Original 200
Pickled 200
那么 Counter 有什么奇怪的地方,我该如何解决这个问题?
最好的,鲍里斯
您正在腌制整个 class。当你把它拆开时,它就被称为新鲜的。
from collections import Counter
import pickle
class DerivedCounter(Counter):
def __init__(self, *args, capacity=10):
print('dc', args, capacity) #<---add this line
Counter.__init__(self, *args)
self._capacity = capacity
dc = DerivedCounter(capacity=200)
print(pickle.loads(pickle.dumps(dc))._capacity)
#>>> dc () 200
#>>> dc ({},) 10
#>>> 10
也许酸洗 dc.__dict__
就足够了。
from collections import Counter
import pickle
class Pickler:
@property
def pickled(self):
return pickle.dumps(self.__dict__)
def __init__(self, pickled=None):
if pickled:
self.cucumbered(pickled)
def cucumbered(self, pickled):
self.__dict__ = pickle.loads(pickled)
return self
class DerivedCounter(Counter, Pickler):
def __init__(self, *args, capacity=10, pickled=None):
Counter.__init__(self, *args)
Pickler.__init__(self, pickled)
if not pickled:
self._capacity = capacity
dc = DerivedCounter(capacity=200)
print(dc.cucumbered(dc.pickled)._capacity) # 200
dc2 = DerivedCounter(pickled=dc.pickled)
print(dc2._capacity) # 200
你可以使用copyreg
(内置模块copyreg)模块来做你想做的事。
它将复制您创建的实例中的所有状态。
class DerivedCounter(Counter):
def __init__(self, capacity: int = 10, *args):
print('init')
super().__init__(*args)
self._capacity = capacity
def pickle_dc(d):
print("pickling a DerivedCounter instance...")
return DerivedCounter, (d._capacity, { key: value for key, value in
dc.items()})
copyreg.pickle(DerivedCounter, pickle_dc)
dc = DerivedCounter(200, {'a':10})
print(dc._capacity)
a = pickle.dumps(dc)
print(pickle.loads(a)._capacity)
c = pickle.loads(a)
在 pickle 文档中我们可以看到 pickle 加载不会调用 __init__
doc here
@jasonharper,非常感谢您在评论中的解释! Counter的__reduce__
函数看起来确实有点特别。例如,函数 __getstate__
、__setstate__
、__getnewargs__
和 __getnewargs_ex__
未被调用。因此,重新定义 __reduce__
似乎是可行的方法。
这是您所指的重新定义: https://github.com/python/cpython/blob/3.7/Lib/collections/init.py#L697
我不清楚为什么 Counter 需要这个。事实上,将其 __reduce__
功能恢复为 dict 之一似乎解决了我的问题:
from collections import Counter
import pickle
class DerivedCounter(Counter):
def __init__(self, *args, capacity: int = 10):
super().__init__(*args)
self._capacity = capacity
def __reduce__(self):
return dict.__reduce__(self)
dc = DerivedCounter(capacity = 200)
print("Original", dc._capacity)
print("Pickled", pickle.loads(pickle.dumps(dc))._capacity)
现在输出是我所期望的:
Original 200
Pickled 200
而且恢复的对象作为计数器的功能似乎完好无损。所以也许 Counter 中 __reduce__
的重新定义已经过时了,如果没有危害的话?
再次感谢!