为什么 dask 在 CUDA 函数上返回 none?
Why is dask returning none on a CUDA function?
我试图在我的 cuda 函数之上添加 dask,但是当 dask returns 我得到一个 NoneType 对象。
from numba import cuda
import numpy as np
from dask.distributed import Client, LocalCluster
@cuda.jit()
def addingNumbersCUDA (big_array, big_array2, save_array):
i = cuda.grid(1)
if i < big_array.shape[0]:
for j in range (big_array.shape[1]):
save_array[i][j] = big_array[i][j] * big_array2[i][j]
if __name__ == "__main__":
cluster = LocalCluster()
client = Client(cluster)
big_array = np.random.random_sample((100, 3000))
big_array2 = np.random.random_sample((100, 3000))
save_array = np.zeros(shape=(100, 3000))
arraysize = 100
threadsperblock = 64
blockspergrid = (arraysize + (threadsperblock - 1))
x = client.submit(addingNumbersCUDA[blockspergrid, threadsperblock], big_array, big_array2, save_array)
y = client.gather(x)
print(y)
我知道你实际上并没有在 cuda 函数中 return 并且结果被推回你调用的数组。这就是我得到 noneType 的原因,还是因为我对 cuda 使用 dask 错误?
正如 Matthew Rocklin 在这个问题中指出的:,dask 无法处理 in-place 操作。为了解决这个问题,最好添加一个处理 gpu 代码的附加函数。
from numba import cuda
import numpy as np
from dask.distributed import Client, LocalCluster
@cuda.jit()
def addingNumbersCUDA (big_array, big_array2, save_array):
i = cuda.grid(1)
if i < big_array.shape[0]:
for j in range (big_array.shape[1]):
save_array[i][j] = big_array[i][j] * big_array2[i][j]
def toCUDA (big_array, big_array2, save_array):
arraysize = 100
threadsperblock = 64
blockspergrid = (arraysize + (threadsperblock - 1))
d_big_array = cuda.to_device(big_array)
d_big_array2 = cuda.to_device(big_array2)
d_save_array = cuda.to_device(save_array)
addingNumbersCUDA[blockspergrid, threadsperblock](d_big_array, d_big_array2, d_save_array)
save_array = d_save_array.copy_to_host()
return save_array
if __name__ == "__main__":
cluster = LocalCluster()
client = Client(cluster)
big_array = np.random.random_sample((100, 3000))
big_array2 = np.random.random_sample((100, 3000))
save_array = np.zeros(shape=(100, 3000))
x = client.submit(toCUDA, big_array, big_array2, save_array)
y = client.gather(x)
print(y)
我试图在我的 cuda 函数之上添加 dask,但是当 dask returns 我得到一个 NoneType 对象。
from numba import cuda
import numpy as np
from dask.distributed import Client, LocalCluster
@cuda.jit()
def addingNumbersCUDA (big_array, big_array2, save_array):
i = cuda.grid(1)
if i < big_array.shape[0]:
for j in range (big_array.shape[1]):
save_array[i][j] = big_array[i][j] * big_array2[i][j]
if __name__ == "__main__":
cluster = LocalCluster()
client = Client(cluster)
big_array = np.random.random_sample((100, 3000))
big_array2 = np.random.random_sample((100, 3000))
save_array = np.zeros(shape=(100, 3000))
arraysize = 100
threadsperblock = 64
blockspergrid = (arraysize + (threadsperblock - 1))
x = client.submit(addingNumbersCUDA[blockspergrid, threadsperblock], big_array, big_array2, save_array)
y = client.gather(x)
print(y)
我知道你实际上并没有在 cuda 函数中 return 并且结果被推回你调用的数组。这就是我得到 noneType 的原因,还是因为我对 cuda 使用 dask 错误?
正如 Matthew Rocklin 在这个问题中指出的:
from numba import cuda
import numpy as np
from dask.distributed import Client, LocalCluster
@cuda.jit()
def addingNumbersCUDA (big_array, big_array2, save_array):
i = cuda.grid(1)
if i < big_array.shape[0]:
for j in range (big_array.shape[1]):
save_array[i][j] = big_array[i][j] * big_array2[i][j]
def toCUDA (big_array, big_array2, save_array):
arraysize = 100
threadsperblock = 64
blockspergrid = (arraysize + (threadsperblock - 1))
d_big_array = cuda.to_device(big_array)
d_big_array2 = cuda.to_device(big_array2)
d_save_array = cuda.to_device(save_array)
addingNumbersCUDA[blockspergrid, threadsperblock](d_big_array, d_big_array2, d_save_array)
save_array = d_save_array.copy_to_host()
return save_array
if __name__ == "__main__":
cluster = LocalCluster()
client = Client(cluster)
big_array = np.random.random_sample((100, 3000))
big_array2 = np.random.random_sample((100, 3000))
save_array = np.zeros(shape=(100, 3000))
x = client.submit(toCUDA, big_array, big_array2, save_array)
y = client.gather(x)
print(y)