在 Python 中排除 collections.Counter 中的零
Exclude zeros in collections.Counter in Python
有没有办法 collections.Counter
不 count/ignores 给定值(这里是 0):
from collections import Counter
import numpy as np
idx = np.random.randint(4, size=(100,100))
most_common = np.zeros(100)
num_most_common = np.zeros(100)
for i in range(100):
most_common[i], num_most_common[i] = Counter(idx[i, :]).most_common(1)[0]
所以如果 0
是最常见的值,它应该给出第二个最常见的值。另外,有没有办法避免这种情况下的for循环?
您可以执行以下操作,使用生成器仅对不为 0 的内容进行计数。
most_common = np.array([Counter(x for x in r if x).most_common(1)[0][0] for r in idx])
num_most_common = np.array([Counter(x for x in r if x).most_common(1)[0][1] for r in idx])
甚至
count = np.array([Counter(x for x in r if x).most_common(1)[0] for r in idx])
most_common = count[:,0]
num_most_common = count[:,1]
对于正数,我们可以使用 -
# @Divakar
def bincount2D_vectorized(a):
N = a.max()+1
a_offs = a + np.arange(a.shape[0])[:,None]*N
return np.bincount(a_offs.ravel(), minlength=a.shape[0]*N).reshape(-1,N)
# Get binned counts per row, with each number representing a bin
c = bincount2D_vectorized(idx)
# Skip the first element, as that represents counts for 0s.
# Get most common element and count per row
most_common = c[:,1:].argmax(1)+1
num_most_common = c[:,1:].max(1)
# faster : num_most_common = c[np.arange(len(most_common)),most_common]
对于通用的 int 数字,我们可以像这样扩展 -
s = idx.min()
c = bincount2D_vectorized(idx-s)
c[:,-s] = 0
most_common = c.argmax(1)
num_most_common = c[np.arange(len(most_common)),most_common]
most_common += s
有没有办法 collections.Counter
不 count/ignores 给定值(这里是 0):
from collections import Counter
import numpy as np
idx = np.random.randint(4, size=(100,100))
most_common = np.zeros(100)
num_most_common = np.zeros(100)
for i in range(100):
most_common[i], num_most_common[i] = Counter(idx[i, :]).most_common(1)[0]
所以如果 0
是最常见的值,它应该给出第二个最常见的值。另外,有没有办法避免这种情况下的for循环?
您可以执行以下操作,使用生成器仅对不为 0 的内容进行计数。
most_common = np.array([Counter(x for x in r if x).most_common(1)[0][0] for r in idx])
num_most_common = np.array([Counter(x for x in r if x).most_common(1)[0][1] for r in idx])
甚至
count = np.array([Counter(x for x in r if x).most_common(1)[0] for r in idx])
most_common = count[:,0]
num_most_common = count[:,1]
对于正数,我们可以使用
# @Divakar
def bincount2D_vectorized(a):
N = a.max()+1
a_offs = a + np.arange(a.shape[0])[:,None]*N
return np.bincount(a_offs.ravel(), minlength=a.shape[0]*N).reshape(-1,N)
# Get binned counts per row, with each number representing a bin
c = bincount2D_vectorized(idx)
# Skip the first element, as that represents counts for 0s.
# Get most common element and count per row
most_common = c[:,1:].argmax(1)+1
num_most_common = c[:,1:].max(1)
# faster : num_most_common = c[np.arange(len(most_common)),most_common]
对于通用的 int 数字,我们可以像这样扩展 -
s = idx.min()
c = bincount2D_vectorized(idx-s)
c[:,-s] = 0
most_common = c.argmax(1)
num_most_common = c[np.arange(len(most_common)),most_common]
most_common += s