具有给定索引的 numpy 元素的总和
A sum of numpy elements with a given index
这段代码实现了没有截距的单变量回归。它有效,但我想不出不使用慢 python 迭代的方法来做到这一点。有什么想法吗?
# y: numpy array of n values, for large n
# coeff: numpy array of n values, for large n
# L : size of result
# l_indices : numpy array of indices from 0 to L-1
def simple_regression(y, coeff, L, l_index):
numerator = y*coeff
denominator = np.square(coeff)
numsum = np.zeros(L)
denomsum = np.zeros(L)
for (n,d,l) in zip(numerator,denominator,l_index):
numsum[l] += n
denomsum[l] += d
return numsum / denomsum
基本上像下面这样的操作,不会进行大量内存分配:
numsum[l] = np.sum(numerator[l_index == l])
(这样做比我的第一个代码要低得多)
如果您知道您的索引 l_index
只有唯一值,您可以这样做:
numsum[l_index] += numerator
denomsum[l_index] += denominator
如果您的索引不知道是唯一的,您可以使用 numpy.add.at
:
做同样的事情
numpy.add.at(numsum, l_index, numerator)
numpy.add.at(denomsum, l_index, denominator)
您可以使用 numpy.bincount
:
import numpy as np
def simple_regression(y, coeff, L, l_index):
numerator = y*coeff
denominator = np.square(coeff)
numsum = np.zeros(L)
denomsum = np.zeros(L)
for (n,d,l) in zip(numerator,denominator,l_index):
numsum[l] += n
denomsum[l] += d
return numsum / denomsum
def simple_regression_pp(y, coeff, L, l_index):
numerator = y*coeff
denominator = np.square(coeff)
numsum = np.bincount(l_index, numerator, L)
denomsum = np.bincount(l_index, denominator, L)
return numsum / denomsum
def simple_regression_br(y, coeff, L, l_index):
numerator = y*coeff
denominator = np.square(coeff)
numsum = np.zeros(L)
denomsum = np.zeros(L)
np.add.at(numsum, l_index, numerator)
np.add.at(denomsum, l_index, denominator)
return numsum / denomsum
L, N = 1_000, 1_000_000
y, coeff = np.random.random((2, N))
l_index = np.random.randint(0, L, (N,))
from timeit import timeit
print('OP', timeit("simple_regression(y, coeff, L, l_index)", globals=globals(),
number=10), 'sec')
print('pp', timeit("simple_regression_pp(y, coeff, L, l_index)",
globals=globals(), number=10), 'sec')
print('br', timeit("simple_regression_br(y, coeff, L, l_index)",
globals=globals(), number=10), 'sec')
样本运行:
OP 6.602819449035451 sec
pp 0.12009818502701819 sec
br 1.5504542298149318 sec
这段代码实现了没有截距的单变量回归。它有效,但我想不出不使用慢 python 迭代的方法来做到这一点。有什么想法吗?
# y: numpy array of n values, for large n
# coeff: numpy array of n values, for large n
# L : size of result
# l_indices : numpy array of indices from 0 to L-1
def simple_regression(y, coeff, L, l_index):
numerator = y*coeff
denominator = np.square(coeff)
numsum = np.zeros(L)
denomsum = np.zeros(L)
for (n,d,l) in zip(numerator,denominator,l_index):
numsum[l] += n
denomsum[l] += d
return numsum / denomsum
基本上像下面这样的操作,不会进行大量内存分配:
numsum[l] = np.sum(numerator[l_index == l])
(这样做比我的第一个代码要低得多)
如果您知道您的索引 l_index
只有唯一值,您可以这样做:
numsum[l_index] += numerator
denomsum[l_index] += denominator
如果您的索引不知道是唯一的,您可以使用 numpy.add.at
:
numpy.add.at(numsum, l_index, numerator)
numpy.add.at(denomsum, l_index, denominator)
您可以使用 numpy.bincount
:
import numpy as np
def simple_regression(y, coeff, L, l_index):
numerator = y*coeff
denominator = np.square(coeff)
numsum = np.zeros(L)
denomsum = np.zeros(L)
for (n,d,l) in zip(numerator,denominator,l_index):
numsum[l] += n
denomsum[l] += d
return numsum / denomsum
def simple_regression_pp(y, coeff, L, l_index):
numerator = y*coeff
denominator = np.square(coeff)
numsum = np.bincount(l_index, numerator, L)
denomsum = np.bincount(l_index, denominator, L)
return numsum / denomsum
def simple_regression_br(y, coeff, L, l_index):
numerator = y*coeff
denominator = np.square(coeff)
numsum = np.zeros(L)
denomsum = np.zeros(L)
np.add.at(numsum, l_index, numerator)
np.add.at(denomsum, l_index, denominator)
return numsum / denomsum
L, N = 1_000, 1_000_000
y, coeff = np.random.random((2, N))
l_index = np.random.randint(0, L, (N,))
from timeit import timeit
print('OP', timeit("simple_regression(y, coeff, L, l_index)", globals=globals(),
number=10), 'sec')
print('pp', timeit("simple_regression_pp(y, coeff, L, l_index)",
globals=globals(), number=10), 'sec')
print('br', timeit("simple_regression_br(y, coeff, L, l_index)",
globals=globals(), number=10), 'sec')
样本运行:
OP 6.602819449035451 sec
pp 0.12009818502701819 sec
br 1.5504542298149318 sec