如何创建交互稀疏矩阵?
How do i create interacting sparse matrix?
假设我有两个稀疏矩阵:
from scipy.sparse import random
from scipy import stats
S0 = random(5000,100, density=0.01)
S1 = random(5000,100,density=0.01)
我想创建一个稀疏矩阵 S2
,其中的形状是 (5000,100*100)。 (在我的实际应用中,这个'5000'应该是2000万)。对于每一行,它是这两个 100 维向量中的某种交互。
S2 = some_kind_of_tensor_multiplication(S0 ,S1 )
为了说明 S2[i,j] = S0[i,k0] * S1[i,k1],我们从 [0,99] 遍历所有 k0,k1 以创建长度为 10000 的第 i 行。我找不到任何有效的方法来实现这一目标。有人能帮忙吗?
低效的方法看起来像,但我认为这将是非常低效的...:[=17=]
result=[]
for i in range(S0.shape[1]):
for j in range(S1.shape[1]):
result.append(S0[:,i]*S1[:,j])
result = np.vstack(result).T
类似问题:
Special kind of row-by-row multiplication of 2 sparse matrices in Python
我试过了:
import numpy as np
from scipy.sparse import random
from scipy import stats
from scipy import sparse
S0 = random(20000000,100, density=0.01).tocsr()
S1 = random(20000000,100,density=0.01).tocsr()
def test_iter(A, B):
m,n1 = A.shape
n2 = B.shape[1]
Cshape = (m, n1*n2)
data = np.empty((m,),dtype=object)
col = np.empty((m,),dtype=object)
row = np.empty((m,),dtype=object)
for i,(a,b) in enumerate(zip(A, B)):
data[i] = np.outer(a.data, b.data).flatten()
#col1 = a.indices * np.arange(1,a.nnz+1) # wrong when a isn't dense
col1 = a.indices * n2 # correction
col[i] = (col1[:,None]+b.indices).flatten()
row[i] = np.full((a.nnz*b.nnz,), i)
data = np.concatenate(data)
col = np.concatenate(col)
row = np.concatenate(row)
return sparse.coo_matrix((data,(row,col)),shape=Cshape)
尝试:
%%time
S_result = test_iter(S0,S1)
Wall time: 53min 8s .我们有没有更快的方案,谢谢?
这是一个重写,直接使用 csr
intptr
。它通过直接切片 data
和 indices
来节省时间,而不是每行制作一个全新的 1 行 csr
矩阵:
def test_iter2(A, B):
m,n1 = A.shape
n2 = B.shape[1]
Cshape = (m, n1*n2)
data = []
col = []
row = []
for i in range(A.shape[0]):
slc1 = slice(A.indptr[i],A.indptr[i+1])
data1 = A.data[slc1]; ind1 = A.indices[slc1]
slc2 = slice(B.indptr[i],B.indptr[i+1])
data2 = B.data[slc2]; ind2 = B.indices[slc2]
data.append(np.outer(data1, data2).ravel())
col.append(((ind1*n2)[:,None]+ind2).ravel())
row.append(np.full(len(data1)*len(data2), i))
data = np.concatenate(data)
col = np.concatenate(col)
row = np.concatenate(row)
return sparse.coo_matrix((data,(row,col)),shape=Cshape)
对于较小的测试用例,这可以节省相当多的时间:
In [536]: S0=sparse.random(200,200, 0.01, format='csr')
In [537]: S1=sparse.random(200,200, 0.01, format='csr')
In [538]: timeit test_iter(S0,S1)
42.8 ms ± 1.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [539]: timeit test_iter2(S0,S1)
6.94 ms ± 27 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
假设我有两个稀疏矩阵:
from scipy.sparse import random
from scipy import stats
S0 = random(5000,100, density=0.01)
S1 = random(5000,100,density=0.01)
我想创建一个稀疏矩阵 S2
,其中的形状是 (5000,100*100)。 (在我的实际应用中,这个'5000'应该是2000万)。对于每一行,它是这两个 100 维向量中的某种交互。
S2 = some_kind_of_tensor_multiplication(S0 ,S1 )
为了说明 S2[i,j] = S0[i,k0] * S1[i,k1],我们从 [0,99] 遍历所有 k0,k1 以创建长度为 10000 的第 i 行。我找不到任何有效的方法来实现这一目标。有人能帮忙吗?
低效的方法看起来像,但我认为这将是非常低效的...:[=17=]
result=[]
for i in range(S0.shape[1]):
for j in range(S1.shape[1]):
result.append(S0[:,i]*S1[:,j])
result = np.vstack(result).T
类似问题: Special kind of row-by-row multiplication of 2 sparse matrices in Python
我试过了:
import numpy as np
from scipy.sparse import random
from scipy import stats
from scipy import sparse
S0 = random(20000000,100, density=0.01).tocsr()
S1 = random(20000000,100,density=0.01).tocsr()
def test_iter(A, B):
m,n1 = A.shape
n2 = B.shape[1]
Cshape = (m, n1*n2)
data = np.empty((m,),dtype=object)
col = np.empty((m,),dtype=object)
row = np.empty((m,),dtype=object)
for i,(a,b) in enumerate(zip(A, B)):
data[i] = np.outer(a.data, b.data).flatten()
#col1 = a.indices * np.arange(1,a.nnz+1) # wrong when a isn't dense
col1 = a.indices * n2 # correction
col[i] = (col1[:,None]+b.indices).flatten()
row[i] = np.full((a.nnz*b.nnz,), i)
data = np.concatenate(data)
col = np.concatenate(col)
row = np.concatenate(row)
return sparse.coo_matrix((data,(row,col)),shape=Cshape)
尝试:
%%time
S_result = test_iter(S0,S1)
Wall time: 53min 8s .我们有没有更快的方案,谢谢?
这是一个重写,直接使用 csr
intptr
。它通过直接切片 data
和 indices
来节省时间,而不是每行制作一个全新的 1 行 csr
矩阵:
def test_iter2(A, B):
m,n1 = A.shape
n2 = B.shape[1]
Cshape = (m, n1*n2)
data = []
col = []
row = []
for i in range(A.shape[0]):
slc1 = slice(A.indptr[i],A.indptr[i+1])
data1 = A.data[slc1]; ind1 = A.indices[slc1]
slc2 = slice(B.indptr[i],B.indptr[i+1])
data2 = B.data[slc2]; ind2 = B.indices[slc2]
data.append(np.outer(data1, data2).ravel())
col.append(((ind1*n2)[:,None]+ind2).ravel())
row.append(np.full(len(data1)*len(data2), i))
data = np.concatenate(data)
col = np.concatenate(col)
row = np.concatenate(row)
return sparse.coo_matrix((data,(row,col)),shape=Cshape)
对于较小的测试用例,这可以节省相当多的时间:
In [536]: S0=sparse.random(200,200, 0.01, format='csr')
In [537]: S1=sparse.random(200,200, 0.01, format='csr')
In [538]: timeit test_iter(S0,S1)
42.8 ms ± 1.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [539]: timeit test_iter2(S0,S1)
6.94 ms ± 27 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)