创建具有特殊设置功能的自定义计数器对象
Creating a custom Counter object with special set function
来自 ,@AshwiniChaudhary 给出了一个很好的答案来创建一个新的 Counter
具有不同 set() 函数的对象:
from collections import Counter
class CustomCounter(Counter):
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(u"\uE000"):
key += u"\uE000"
super(CustomCounter, self).__setitem__(key, value)
为了允许用户定义的 char/str 附加到密钥,我试过:
from collections import Counter, defaultdict
class AppendedStrCounter(Counter):
def __init__(self, str_to_append):
self._appended_str = str_to_append
super(AppendedStrCounter, self).__init__()
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(self._appended_str):
key += self._appended_str
super(AppendedStrCounter, self).__setitem__(tuple(key), value)
但是它返回一个空的计数器:
>>> class AppendedStrCounter(Counter):
... def __init__(self, str_to_append):
... self._appended_str = str_to_append
... super(AppendedStrCounter, self).__init__()
... def __setitem__(self, key, value):
... if len(key) > 1 and not key.endswith(self._appended_str):
... key += self._appended_str
... super(AppendedStrCounter, self).__setitem__(tuple(key), value)
...
>>> AppendedStrCounter('foo bar bar blah'.split())
AppendedStrCounter()
那是因为我在 __init__()
:
中缺少 iter
from collections import Counter, defaultdict
class AppendedStrCounter(Counter):
def __init__(self, iter, str_to_append):
self._appended_str = str_to_append
super(AppendedStrCounter, self).__init__(iter)
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(self._appended_str):
key += self._appended_str
super(AppendedStrCounter, self).__setitem__(tuple(key), value)
[输出]:
>>> AppendedStrCounter('foo bar bar blah'.split(), u'\ue000')
AppendedStrCounter({('f', 'o', 'o', '\ue000'): 1, ('b', 'a', 'r', '\ue000'): 1, ('b', 'l', 'a', 'h', '\ue000'): 1})
但是 'bar'
的值是错误的,应该是 2 而不是 1。
是否使用 iter
到 __init__()
初始化 Counter
的正确方法?
正如在
,
collections.Counter
没有记录 如何 它的 __init__
方法添加键或设置值,只是它确实如此。
由于它没有明确设计用于子类化,因此最明智的做法是不将其子类化。
collections.abc
模块的存在是为了提供 Python 内置类型的易于子类化的抽象 类,包括 dict
(MutableMapping
,以 ABC 术语表示)。
所以,如果你只需要 "a Counter
-like class"
(与“满足 isinstance
和 issubclass
等内置函数的 Counter
的子类相反),
您可以创建自己的 MutableMapping
,它有一个 Counter
,然后 "middleman" 初始化器和 Counter
添加到典型 dict
的三个方法:
import collections
import collections.abc
def _identity(s):
'''
Default mutator function.
'''
return s
class CustomCounter(collections.abc.MutableMapping):
'''
Overrides the 5 methods of a MutableMapping:
__getitem__, __setitem__, __delitem__, __iter__, __len__
...and the 3 non-Mapping methods of Counter:
elements, most_common, subtract
'''
def __init__(self, values=None, *, mutator=_identity):
self._mutator = mutator
if values is None:
self._counter = collections.Counter()
else:
values = (self._mutator(v) for v in values)
self._counter = collections.Counter(values)
return
def __getitem__(self, item):
return self._counter[self._mutator(item)]
def __setitem__(self, item, value):
self._counter[self._mutator(item)] = value
return
def __delitem__(self, item):
del self._counter[self._mutator(item)]
return
def __iter__(self):
return iter(self._counter)
def __len__(self):
return len(self._counter)
def __repr__(self):
return ''.join([
self.__class__.__name__,
'(',
repr(dict(self._counter)),
')'
])
def elements(self):
return self._counter.elements()
def most_common(self, n):
return self._counter.most_common(n)
def subtract(self, values):
if isinstance(values, collections.abc.Mapping):
values = {self._mutator(k): v for k, v in values.items()}
return self._counter.subtract(values)
else:
values = (self._mutator(v) for v in values)
return self._counter.subtract(values)
def main():
def mutator(s):
# Asterisks are easier to print than '\ue000'.
return '*' + s + '*'
words = 'the lazy fox jumps over the brown dog'.split()
# Test None (allowed by collections.Counter).
ctr_none = CustomCounter(None)
assert 0 == len(ctr_none)
# Test typical dict and collections.Counter methods.
ctr = CustomCounter(words, mutator=mutator)
print(ctr)
assert 1 == ctr['dog']
assert 2 == ctr['the']
assert 7 == len(ctr)
del(ctr['lazy'])
assert 6 == len(ctr)
ctr.subtract(['jumps', 'dog'])
assert 0 == ctr['dog']
assert 6 == len(ctr)
ctr.subtract({'the': 5, 'bogus': 100})
assert -3 == ctr['the']
assert -100 == ctr['bogus']
assert 7 == len(ctr)
return
if "__main__" == __name__:
main()
输出(换行,便于阅读):
CustomCounter({
'*brown*': 1,
'*lazy*': 1,
'*the*': 2,
'*over*': 1,
'*jumps*': 1,
'*fox*': 1,
'*dog*': 1
})
我向初始化程序 mutator
添加了一个仅限关键字的参数,以存储将现实世界中的任何内容转换为 "mutant" 计数版本的函数。
请注意,这可能意味着 CustomCounter
不再存储 "hashable objects",而是 "hashable objects that don't make the mutator barf".
此外,如果标准库的 Counter
获得了新方法,您必须将它们 CustomCounter
更新为 "override"。
(你也许可以通过使用来解决这个问题
__getattr__
将任何未知属性传递给 self._counter
,但参数中的任何键都将以原始的 "un-mutated" 形式传递给 Counter
。
最后,正如我之前指出的,如果其他代码专门寻找一个,它 实际上 不是 collections.Counter
的子类。
来自 Counter
具有不同 set() 函数的对象:
from collections import Counter
class CustomCounter(Counter):
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(u"\uE000"):
key += u"\uE000"
super(CustomCounter, self).__setitem__(key, value)
为了允许用户定义的 char/str 附加到密钥,我试过:
from collections import Counter, defaultdict
class AppendedStrCounter(Counter):
def __init__(self, str_to_append):
self._appended_str = str_to_append
super(AppendedStrCounter, self).__init__()
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(self._appended_str):
key += self._appended_str
super(AppendedStrCounter, self).__setitem__(tuple(key), value)
但是它返回一个空的计数器:
>>> class AppendedStrCounter(Counter):
... def __init__(self, str_to_append):
... self._appended_str = str_to_append
... super(AppendedStrCounter, self).__init__()
... def __setitem__(self, key, value):
... if len(key) > 1 and not key.endswith(self._appended_str):
... key += self._appended_str
... super(AppendedStrCounter, self).__setitem__(tuple(key), value)
...
>>> AppendedStrCounter('foo bar bar blah'.split())
AppendedStrCounter()
那是因为我在 __init__()
:
from collections import Counter, defaultdict
class AppendedStrCounter(Counter):
def __init__(self, iter, str_to_append):
self._appended_str = str_to_append
super(AppendedStrCounter, self).__init__(iter)
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(self._appended_str):
key += self._appended_str
super(AppendedStrCounter, self).__setitem__(tuple(key), value)
[输出]:
>>> AppendedStrCounter('foo bar bar blah'.split(), u'\ue000')
AppendedStrCounter({('f', 'o', 'o', '\ue000'): 1, ('b', 'a', 'r', '\ue000'): 1, ('b', 'l', 'a', 'h', '\ue000'): 1})
但是 'bar'
的值是错误的,应该是 2 而不是 1。
是否使用 iter
到 __init__()
初始化 Counter
的正确方法?
正如在
collections.Counter
没有记录 如何 它的 __init__
方法添加键或设置值,只是它确实如此。
由于它没有明确设计用于子类化,因此最明智的做法是不将其子类化。
collections.abc
模块的存在是为了提供 Python 内置类型的易于子类化的抽象 类,包括 dict
(MutableMapping
,以 ABC 术语表示)。
所以,如果你只需要 "a Counter
-like class"
(与“满足 isinstance
和 issubclass
等内置函数的 Counter
的子类相反),
您可以创建自己的 MutableMapping
,它有一个 Counter
,然后 "middleman" 初始化器和 Counter
添加到典型 dict
的三个方法:
import collections
import collections.abc
def _identity(s):
'''
Default mutator function.
'''
return s
class CustomCounter(collections.abc.MutableMapping):
'''
Overrides the 5 methods of a MutableMapping:
__getitem__, __setitem__, __delitem__, __iter__, __len__
...and the 3 non-Mapping methods of Counter:
elements, most_common, subtract
'''
def __init__(self, values=None, *, mutator=_identity):
self._mutator = mutator
if values is None:
self._counter = collections.Counter()
else:
values = (self._mutator(v) for v in values)
self._counter = collections.Counter(values)
return
def __getitem__(self, item):
return self._counter[self._mutator(item)]
def __setitem__(self, item, value):
self._counter[self._mutator(item)] = value
return
def __delitem__(self, item):
del self._counter[self._mutator(item)]
return
def __iter__(self):
return iter(self._counter)
def __len__(self):
return len(self._counter)
def __repr__(self):
return ''.join([
self.__class__.__name__,
'(',
repr(dict(self._counter)),
')'
])
def elements(self):
return self._counter.elements()
def most_common(self, n):
return self._counter.most_common(n)
def subtract(self, values):
if isinstance(values, collections.abc.Mapping):
values = {self._mutator(k): v for k, v in values.items()}
return self._counter.subtract(values)
else:
values = (self._mutator(v) for v in values)
return self._counter.subtract(values)
def main():
def mutator(s):
# Asterisks are easier to print than '\ue000'.
return '*' + s + '*'
words = 'the lazy fox jumps over the brown dog'.split()
# Test None (allowed by collections.Counter).
ctr_none = CustomCounter(None)
assert 0 == len(ctr_none)
# Test typical dict and collections.Counter methods.
ctr = CustomCounter(words, mutator=mutator)
print(ctr)
assert 1 == ctr['dog']
assert 2 == ctr['the']
assert 7 == len(ctr)
del(ctr['lazy'])
assert 6 == len(ctr)
ctr.subtract(['jumps', 'dog'])
assert 0 == ctr['dog']
assert 6 == len(ctr)
ctr.subtract({'the': 5, 'bogus': 100})
assert -3 == ctr['the']
assert -100 == ctr['bogus']
assert 7 == len(ctr)
return
if "__main__" == __name__:
main()
输出(换行,便于阅读):
CustomCounter({
'*brown*': 1,
'*lazy*': 1,
'*the*': 2,
'*over*': 1,
'*jumps*': 1,
'*fox*': 1,
'*dog*': 1
})
我向初始化程序 mutator
添加了一个仅限关键字的参数,以存储将现实世界中的任何内容转换为 "mutant" 计数版本的函数。
请注意,这可能意味着 CustomCounter
不再存储 "hashable objects",而是 "hashable objects that don't make the mutator barf".
此外,如果标准库的 Counter
获得了新方法,您必须将它们 CustomCounter
更新为 "override"。
(你也许可以通过使用来解决这个问题
__getattr__
将任何未知属性传递给 self._counter
,但参数中的任何键都将以原始的 "un-mutated" 形式传递给 Counter
。
最后,正如我之前指出的,如果其他代码专门寻找一个,它 实际上 不是 collections.Counter
的子类。