collections.Counter 计数器求和的计数器加法错误
collections.Counter addition error with counter of counters summation
假设我们有如下代码片段:
from collections import namedtuple, Counter
Experiment = namedtuple('Experiment', ['day', 'distribution'])
LabResults = namedtuple('LabResults', ['lab_name', 'experiments_data'])
lab_results_list_good = [
LabResults(
'Lab A',
[
Experiment('first_day', Counter('abc')),
Experiment('second_day', Counter(''))
],
),
LabResults(
'Lab B',
[
Experiment('first_day', Counter('')),
Experiment('second_day', Counter('a')),
]
)
]
lab_results_list_bad = [
LabResults(
'Lab A',
[
Experiment('first_day', Counter('abc')),
Experiment('second_day', Counter('def'))
],
),
LabResults(
'Lab B',
[
Experiment('first_day', Counter('ghi')),
]
)
]
def merge_distributions(lab_results_list):
day_to_distribution_report = Counter()
for lab_result in lab_results_list:
curr_day_to_dist = Counter({exp.day: exp.distribution for exp in lab_result.experiments_data})
day_to_distribution_report += curr_day_to_dist
return day_to_distribution_report
第一种情况工作正常,第二种情况不工作:
In [2]: print merge_distributions(lab_results_list_good)
Counter({'first_day': Counter({'a': 1, 'c': 1, 'b': 1}), 'second_day': Counter({'a': 1})})
In [3]: print merge_distributions(lab_results_list_bad)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-969bd085f2e5> in <module>()
----> 1 print merge_distributions(lab_results_list_bad)
<ipython-input-1-0bdc39e8c846> in merge_distributions(lab_results_list)
42
43 curr_day_to_dist = Counter({exp.day: exp.distribution for exp in lab_result.experiments_data})
---> 44 day_to_distribution_report += curr_day_to_dist
45 return day_to_distribution_report
/usr/local/Cellar/python/2.7.13/Frameworks/Python.framework/Versions/2.7/lib/python2.7/collections.pyc in __add__(self, other)
642 result = Counter()
643 for elem, count in self.items():
--> 644 newcount = count + other[elem]
645 if newcount > 0:
646 result[elem] = newcount
TypeError: unsupported operand type(s) for +: 'Counter' and 'int'
In Python 2 只在第二种情况下不起作用,在 Python 3 中两者都不起作用。
所以我的问题是合并计数器的计数器是一个非常糟糕的主意,因为 python 3 两种情况都失败了 运行?这似乎是一个错误,因为 Counter 对象支持添加。
编辑 我找到了 merge_distribution 函数的快速修复:
def merge_distributions(lab_results_list):
day_range = {exp.day for lab_result in lab_results_list for exp in lab_result.experiments_data}
day_to_distribution_report = Counter()
for lab_result in lab_results_list:
curr_day_to_dist = Counter({exp.day: exp.distribution for exp in lab_result.experiments_data})
map(lambda day: curr_day_to_dist.setdefault(day, Counter()), day_range)
day_to_distribution_report += curr_day_to_dist
return day_to_distribution_report
为什么 python 来源默认不做类似的事情?
如果计数器没有 属性,则默认为 0
,而不是 Counter()
>>> from collections import Counter
>>> c = Counter({
... 'a': Counter(), 'b': Counter(),
... })
>>> d = Counter({
... 'a': Counter()
... })
>>> c['b']
Counter()
>>> d['b']
0
>>> c['b'] + d['b']
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unsupported operand type(s) for +: 'Counter' and 'int'
>>>
这里有一个DefaultCounter
实现:
from collections import Counter
class DefaultCounter(Counter):
def __add__(self, other):
if not isinstance(other, Counter):
if other == 0:
other = Counter()
else:
return NotImplemented
result = Counter()
for elem, count in self.items():
newcount = count + other[elem]
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count > 0:
result[elem] = count
return result
>>> from m import DefaultCounter
>>> a = DefaultCounter({'a': DefaultCounter(), 'b': DefaultCounter()})
>>> b = DefaultCounter({'a': DefaultCounter()})
>>>
>>> a + b
Counter({'a': Counter(), 'b': Counter()})
# not raise anymore. ^_^
假设我们有如下代码片段:
from collections import namedtuple, Counter
Experiment = namedtuple('Experiment', ['day', 'distribution'])
LabResults = namedtuple('LabResults', ['lab_name', 'experiments_data'])
lab_results_list_good = [
LabResults(
'Lab A',
[
Experiment('first_day', Counter('abc')),
Experiment('second_day', Counter(''))
],
),
LabResults(
'Lab B',
[
Experiment('first_day', Counter('')),
Experiment('second_day', Counter('a')),
]
)
]
lab_results_list_bad = [
LabResults(
'Lab A',
[
Experiment('first_day', Counter('abc')),
Experiment('second_day', Counter('def'))
],
),
LabResults(
'Lab B',
[
Experiment('first_day', Counter('ghi')),
]
)
]
def merge_distributions(lab_results_list):
day_to_distribution_report = Counter()
for lab_result in lab_results_list:
curr_day_to_dist = Counter({exp.day: exp.distribution for exp in lab_result.experiments_data})
day_to_distribution_report += curr_day_to_dist
return day_to_distribution_report
第一种情况工作正常,第二种情况不工作:
In [2]: print merge_distributions(lab_results_list_good)
Counter({'first_day': Counter({'a': 1, 'c': 1, 'b': 1}), 'second_day': Counter({'a': 1})})
In [3]: print merge_distributions(lab_results_list_bad)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-969bd085f2e5> in <module>()
----> 1 print merge_distributions(lab_results_list_bad)
<ipython-input-1-0bdc39e8c846> in merge_distributions(lab_results_list)
42
43 curr_day_to_dist = Counter({exp.day: exp.distribution for exp in lab_result.experiments_data})
---> 44 day_to_distribution_report += curr_day_to_dist
45 return day_to_distribution_report
/usr/local/Cellar/python/2.7.13/Frameworks/Python.framework/Versions/2.7/lib/python2.7/collections.pyc in __add__(self, other)
642 result = Counter()
643 for elem, count in self.items():
--> 644 newcount = count + other[elem]
645 if newcount > 0:
646 result[elem] = newcount
TypeError: unsupported operand type(s) for +: 'Counter' and 'int'
In Python 2 只在第二种情况下不起作用,在 Python 3 中两者都不起作用。
所以我的问题是合并计数器的计数器是一个非常糟糕的主意,因为 python 3 两种情况都失败了 运行?这似乎是一个错误,因为 Counter 对象支持添加。
编辑 我找到了 merge_distribution 函数的快速修复:
def merge_distributions(lab_results_list):
day_range = {exp.day for lab_result in lab_results_list for exp in lab_result.experiments_data}
day_to_distribution_report = Counter()
for lab_result in lab_results_list:
curr_day_to_dist = Counter({exp.day: exp.distribution for exp in lab_result.experiments_data})
map(lambda day: curr_day_to_dist.setdefault(day, Counter()), day_range)
day_to_distribution_report += curr_day_to_dist
return day_to_distribution_report
为什么 python 来源默认不做类似的事情?
如果计数器没有 属性,则默认为 0
,而不是 Counter()
>>> from collections import Counter
>>> c = Counter({
... 'a': Counter(), 'b': Counter(),
... })
>>> d = Counter({
... 'a': Counter()
... })
>>> c['b']
Counter()
>>> d['b']
0
>>> c['b'] + d['b']
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unsupported operand type(s) for +: 'Counter' and 'int'
>>>
这里有一个DefaultCounter
实现:
from collections import Counter
class DefaultCounter(Counter):
def __add__(self, other):
if not isinstance(other, Counter):
if other == 0:
other = Counter()
else:
return NotImplemented
result = Counter()
for elem, count in self.items():
newcount = count + other[elem]
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count > 0:
result[elem] = count
return result
>>> from m import DefaultCounter
>>> a = DefaultCounter({'a': DefaultCounter(), 'b': DefaultCounter()})
>>> b = DefaultCounter({'a': DefaultCounter()})
>>>
>>> a + b
Counter({'a': Counter(), 'b': Counter()})
# not raise anymore. ^_^