在 dask.array.map_blocks [OpenCV, Dask] 中调用 return cv2.cvtColor
call and return cv2.cvtColor in dask.array.map_blocks [OpenCV, Dask]
我尝试使用 dask 以并行方式执行从 3 通道到 1 通道的颜色转换。我希望尝试这样做,以便将来可以执行内存不足的计算。我使用 da.map_blocks.
from dask.array.image import imread
import dask.array as da
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
im = imread('../datatest/*.JPG') # wrap around existing images
def showplt(x):
# print(np.array(im[0]))
gray = cv2.cvtColor(np.array(x[0]), cv2.COLOR_BGR2GRAY)
print("shape of `x` in showplt:", np.array(x[0]).shape)
print("shape of `gray` in showplt:", gray.shape)
return gray
c = im.chunks
print("chunk size of `im`", im.chunks, '\n')
result = im.map_blocks(showplt, dtype=im[0].dtype, chunks=(c[0], c[1], c[2], c[3]))
s = result.compute()
但是我得到了这个错误
chunk size of `im` ((1, 1, 1, 1), (5184,), (3456,), (3,))
shape of `x` in showplt: (5184, 3456, 3)
shape of `gray` in showplt: (5184, 3456)
shape of `x` in showplt: (5184, 3456, 3)
shape of `gray` in showplt: (5184, 3456)
shape of `x` in showplt: (5184, 3456, 3)
shape of `gray` in showplt: (5184, 3456)
shape of `x` in showplt: (5184, 3456, 3)
shape of `gray` in showplt: (5184, 3456)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-177-f86d33eced47> in <module>()
20 print("chunk size of `im`", im.chunks, '\n')
21 result = im.map_blocks(showplt, dtype=im[0].dtype, chunks=(c[0], c[1], c[2], c[3]))
---> 22 s = result.compute()
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/base.py in compute(self, **kwargs)
93 Extra keywords to forward to the scheduler ``get`` function.
94 """
---> 95 (result,) = compute(self, traverse=False, **kwargs)
96 return result
97
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/base.py in compute(*args, **kwargs)
205 return tuple(a if not isinstance(a, Base)
206 else a._finalize(next(results_iter))
--> 207 for a in args)
208
209
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/base.py in <genexpr>(.0)
205 return tuple(a if not isinstance(a, Base)
206 else a._finalize(next(results_iter))
--> 207 for a in args)
208
209
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in finalize(results)
914 while isinstance(results2, (tuple, list)):
915 if len(results2) > 1:
--> 916 return concatenate3(results)
917 else:
918 results2 = results2[0]
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in concatenate3(arrays)
3335 if not arrays:
3336 return np.empty(0)
-> 3337 chunks = chunks_from_arrays(arrays)
3338 shape = tuple(map(sum, chunks))
3339
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in chunks_from_arrays(arrays)
3240
3241 while isinstance(arrays, (list, tuple)):
-> 3242 result.append(tuple([shape(deepfirst(a))[dim] for a in arrays]))
3243 arrays = arrays[0]
3244 dim += 1
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in <listcomp>(.0)
3240
3241 while isinstance(arrays, (list, tuple)):
-> 3242 result.append(tuple([shape(deepfirst(a))[dim] for a in arrays]))
3243 arrays = arrays[0]
3244 dim += 1
IndexError: tuple index out of range
我还要将 map_blocks
中的 chunks parameter
编辑为
result = im.map_blocks(showplt, dtype=im[0].dtype, chunks=(c[0], c[1], c[2]))
但是没有成功
chunk size of `im` ((1, 1, 1, 1), (5184,), (3456,), (3,))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-178-7b668f779a32> in <module>()
19 c = im.chunks
20 print("chunk size of `im`", im.chunks, '\n')
---> 21 result = im.map_blocks(showplt, dtype=im[0].dtype, chunks=(c[0], c[1], c[2]))
22 s = result.compute()
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in map_blocks(self, func, *args, **kwargs)
1568 @wraps(map_blocks)
1569 def map_blocks(self, func, *args, **kwargs):
-> 1570 return map_blocks(func, self, *args, **kwargs)
1571
1572 def map_overlap(self, func, depth, boundary=None, trim=True, **kwargs):
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in map_blocks(func, *args, **kwargs)
679 if len(chunks) != len(numblocks):
680 raise ValueError("Provided chunks have {0} dims, expected {1} "
--> 681 "dims.".format(len(chunks), len(numblocks)))
682 chunks2 = []
683 for i, (c, nb) in enumerate(zip(chunks, numblocks)):
ValueError: Provided chunks have 3 dims, expected 4 dims.
如何指定块大小??
当您的函数更改基础 NumPy 数组的形状时,map_blocks 方法可能会变得棘手。我认为您在指定块的正确轨道上,但您还需要指定要删除的维度。
In [1]: import dask.array as da
In [2]: x = da.ones((5, 5, 5), chunks=(5, 2, 2))
In [3]: x.map_blocks(lambda x: x[0, :, :], drop_axis=0)
Out[3]: dask.array<lambda, shape=(5, 5), dtype=float64, chunksize=(2, 2)>
终于知道窍门了。 drop_axis=0
给我一个错误
ValueError: Can't drop an axis with more than 1 block. Please use `atop` instead.
为了让它工作,我使用 drop_axis=[1,3]
和 chunks=(im.shape[1], im.shape[2])
from dask.array.image import imread
import dask.array as da
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
im = imread('../datatest/*.JPG') # wrap around existing images
def showplt(x):
gray = cv2.cvtColor(x[0], cv2.COLOR_BGR2GRAY)
return gray
c = im.chunks
result = im.map_blocks(showplt, dtype=im.dtype, chunks=(im.shape[1], im.shape[2]), drop_axis=[1,3])
print(result)
plt.imshow(result, cmap='gray')
但是它给我一个垂直连接的图像
dask.array<showplt, shape=(20736, 3456), dtype=uint8, chunksize=(5184, 3456)>
Out[10]:
<matplotlib.image.AxesImage at 0x7f10c461e7b8>
为了让它像 imread
一样可迭代,我需要重塑 result
reshape = result.reshape((im.shape[0], im.shape[1], im.shape[2]))
plt.imshow(reshape[0], cmap='gray')
结果
dask.array<reshape, shape=(4, 5184, 3456), dtype=uint8, chunksize=(1, 5184, 3456)>
<matplotlib.image.AxesImage at 0x7f10c479e668>
我尝试使用 dask 以并行方式执行从 3 通道到 1 通道的颜色转换。我希望尝试这样做,以便将来可以执行内存不足的计算。我使用 da.map_blocks.
from dask.array.image import imread
import dask.array as da
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
im = imread('../datatest/*.JPG') # wrap around existing images
def showplt(x):
# print(np.array(im[0]))
gray = cv2.cvtColor(np.array(x[0]), cv2.COLOR_BGR2GRAY)
print("shape of `x` in showplt:", np.array(x[0]).shape)
print("shape of `gray` in showplt:", gray.shape)
return gray
c = im.chunks
print("chunk size of `im`", im.chunks, '\n')
result = im.map_blocks(showplt, dtype=im[0].dtype, chunks=(c[0], c[1], c[2], c[3]))
s = result.compute()
但是我得到了这个错误
chunk size of `im` ((1, 1, 1, 1), (5184,), (3456,), (3,))
shape of `x` in showplt: (5184, 3456, 3)
shape of `gray` in showplt: (5184, 3456)
shape of `x` in showplt: (5184, 3456, 3)
shape of `gray` in showplt: (5184, 3456)
shape of `x` in showplt: (5184, 3456, 3)
shape of `gray` in showplt: (5184, 3456)
shape of `x` in showplt: (5184, 3456, 3)
shape of `gray` in showplt: (5184, 3456)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-177-f86d33eced47> in <module>()
20 print("chunk size of `im`", im.chunks, '\n')
21 result = im.map_blocks(showplt, dtype=im[0].dtype, chunks=(c[0], c[1], c[2], c[3]))
---> 22 s = result.compute()
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/base.py in compute(self, **kwargs)
93 Extra keywords to forward to the scheduler ``get`` function.
94 """
---> 95 (result,) = compute(self, traverse=False, **kwargs)
96 return result
97
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/base.py in compute(*args, **kwargs)
205 return tuple(a if not isinstance(a, Base)
206 else a._finalize(next(results_iter))
--> 207 for a in args)
208
209
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/base.py in <genexpr>(.0)
205 return tuple(a if not isinstance(a, Base)
206 else a._finalize(next(results_iter))
--> 207 for a in args)
208
209
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in finalize(results)
914 while isinstance(results2, (tuple, list)):
915 if len(results2) > 1:
--> 916 return concatenate3(results)
917 else:
918 results2 = results2[0]
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in concatenate3(arrays)
3335 if not arrays:
3336 return np.empty(0)
-> 3337 chunks = chunks_from_arrays(arrays)
3338 shape = tuple(map(sum, chunks))
3339
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in chunks_from_arrays(arrays)
3240
3241 while isinstance(arrays, (list, tuple)):
-> 3242 result.append(tuple([shape(deepfirst(a))[dim] for a in arrays]))
3243 arrays = arrays[0]
3244 dim += 1
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in <listcomp>(.0)
3240
3241 while isinstance(arrays, (list, tuple)):
-> 3242 result.append(tuple([shape(deepfirst(a))[dim] for a in arrays]))
3243 arrays = arrays[0]
3244 dim += 1
IndexError: tuple index out of range
我还要将 map_blocks
中的 chunks parameter
编辑为
result = im.map_blocks(showplt, dtype=im[0].dtype, chunks=(c[0], c[1], c[2]))
但是没有成功
chunk size of `im` ((1, 1, 1, 1), (5184,), (3456,), (3,))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-178-7b668f779a32> in <module>()
19 c = im.chunks
20 print("chunk size of `im`", im.chunks, '\n')
---> 21 result = im.map_blocks(showplt, dtype=im[0].dtype, chunks=(c[0], c[1], c[2]))
22 s = result.compute()
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in map_blocks(self, func, *args, **kwargs)
1568 @wraps(map_blocks)
1569 def map_blocks(self, func, *args, **kwargs):
-> 1570 return map_blocks(func, self, *args, **kwargs)
1571
1572 def map_overlap(self, func, depth, boundary=None, trim=True, **kwargs):
/home/sendowo/Projects/non-text_segmentation/env/lib/python3.5/site-packages/dask/array/core.py in map_blocks(func, *args, **kwargs)
679 if len(chunks) != len(numblocks):
680 raise ValueError("Provided chunks have {0} dims, expected {1} "
--> 681 "dims.".format(len(chunks), len(numblocks)))
682 chunks2 = []
683 for i, (c, nb) in enumerate(zip(chunks, numblocks)):
ValueError: Provided chunks have 3 dims, expected 4 dims.
如何指定块大小??
当您的函数更改基础 NumPy 数组的形状时,map_blocks 方法可能会变得棘手。我认为您在指定块的正确轨道上,但您还需要指定要删除的维度。
In [1]: import dask.array as da
In [2]: x = da.ones((5, 5, 5), chunks=(5, 2, 2))
In [3]: x.map_blocks(lambda x: x[0, :, :], drop_axis=0)
Out[3]: dask.array<lambda, shape=(5, 5), dtype=float64, chunksize=(2, 2)>
终于知道窍门了。 drop_axis=0
给我一个错误
ValueError: Can't drop an axis with more than 1 block. Please use `atop` instead.
为了让它工作,我使用 drop_axis=[1,3]
和 chunks=(im.shape[1], im.shape[2])
from dask.array.image import imread
import dask.array as da
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
im = imread('../datatest/*.JPG') # wrap around existing images
def showplt(x):
gray = cv2.cvtColor(x[0], cv2.COLOR_BGR2GRAY)
return gray
c = im.chunks
result = im.map_blocks(showplt, dtype=im.dtype, chunks=(im.shape[1], im.shape[2]), drop_axis=[1,3])
print(result)
plt.imshow(result, cmap='gray')
但是它给我一个垂直连接的图像
dask.array<showplt, shape=(20736, 3456), dtype=uint8, chunksize=(5184, 3456)>
Out[10]:
<matplotlib.image.AxesImage at 0x7f10c461e7b8>
为了让它像 imread
一样可迭代,我需要重塑 result
reshape = result.reshape((im.shape[0], im.shape[1], im.shape[2]))
plt.imshow(reshape[0], cmap='gray')
结果
dask.array<reshape, shape=(4, 5184, 3456), dtype=uint8, chunksize=(1, 5184, 3456)>
<matplotlib.image.AxesImage at 0x7f10c479e668>