使用张量流计算高斯的拉普拉斯算子
Compute the Laplacian of Gaussian using tensorflow
我找到了一个使用 numpy 和 cv2 (link) 的实现,但我在将此代码转换为 tensorflow 时遇到困难。
numpy实现的代码:
import numpy as np
import cv2
def LoG_numpy(img, sigma=1., kappa=0.75, pad=False):
"""
Applies Laplacian of Gaussians to grayscale image.
:param gray_img: image to apply LoG to
:param sigma: Gauss sigma of Gaussian applied to image, <= 0. for none
:param kappa: difference threshold as factor to mean of image values, <= 0 for none
:param pad: flag to pad output w/ zero border, keeping input image size
"""
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.GaussianBlur(img, (0,) * 2, sigma) if sigma > 0. else img
img = cv2.Laplacian(img, cv2.CV_64F)
rows, cols = img.shape[:2]
# min/max of 3x3-neighbourhoods
min_map = np.minimum.reduce(list(img[r:rows-2+r, c:cols-2+c]
for r in range(3) for c in range(3)))
max_map = np.maximum.reduce(list(img[r:rows-2+r, c:cols-2+c]
for r in range(3) for c in range(3)))
# bool matrix for image value positiv (w/out border pixels)
pos_img = img[1:rows-1, 1:cols-1] > 0.
neg_min = min_map < 0.
neg_min[~pos_img] = 0.
pos_max = max_map > 0.
pos_max[pos_img] = 0.
zero_cross = neg_min + pos_max
# values: max - min, scaled to 0--255; set to 0 for no sign change
value_scale = 255. / max(1., img.max() - img.min())
values = value_scale * (max_map - min_map)
# values[1 - zero_cross] = 0.
values[~zero_cross] = 0.
# optional thresholding
if kappa >= 0.:
thresh = float(np.absolute(img).mean()) * kappa
values[values < thresh] = 0.
log_img = values.astype(np.uint8)
if pad:
log_img = np.pad(log_img, pad_width=1, mode='constant', constant_values=0)
return log_img
还有我转换后的代码:
import tensorflow as tf
import tensorflow_addons as tfa
def LoG_tensorflow(img, sigma=1., kappa=0.75, pad=False):
img = tf.convert_to_tensor(img, dtype='float64')
laplace_kernel = tf.constant([[0., 1., 0.], [1., -4., 1.], [0., 1., 0.]], dtype='float64')
laplace_kernel = laplace_kernel[:, :, tf.newaxis, tf.newaxis]
gray_img = tf.image.rgb_to_grayscale(img)
gray_img = tfa.image.gaussian_filter2d(gray_img, sigma=sigma)
gray_img = gray_img[tf.newaxis, ...]
log = tf.nn.conv2d(gray_img, laplace_kernel, [1, 1, 1, 1], 'SAME')[0]
rows, cols = log.shape[:2]
blocks = []
for r in range(3):
for c in range(3):
block = log[r:rows-2+r, c:cols-2+c]
blocks.append(block)
min_map = blocks[0]
max_map = blocks[0]
for block in blocks[1:]:
min_map = tf.math.minimum(min_map, block)
max_map = tf.math.maximum(max_map, block)
pos_img = log[1:rows-1, 1:cols-1] > 0.
neg_min = tf.cast(min_map < 0., 'uint8')
neg_min = neg_min * tf.cast(pos_img, 'uint8')
pos_max = tf.cast(max_map > 0., 'uint8')
pos_max = pos_max * tf.cast(tf.logical_not(pos_img), 'uint8')
zero_cross = tf.logical_or(tf.cast(neg_min, 'bool'), tf.cast(pos_max, 'bool'))
value_scale = 255. / tf.maximum(1., tf.reduce_max(log) - tf.reduce_min(log))
values = value_scale * (max_map - min_map)
values = values * tf.cast(tf.logical_not(zero_cross), 'float64')
if kappa >= 0.:
thresh = tf.abs(tf.reduce_mean(log)) * kappa
threshed = tf.cast(values > thresh, 'float64')
values = values * threshed
if pad:
values = tf.pad(values, [1, 1], mode='CONSTANT', constant_values=0)
return values
拉普拉斯滤波器值取自 cv2 docs。
img1 = cv2.imread(r"original.jpg")
result1 = LoG_numpy(img1, kappa=5.)
result2 = LoG_tensorflow(img1, kappa=5.)
result2 = result2.numpy().reshape((222, 222)).astype('uint8')
cv2.imshow('original', img1)
cv2.imshow('numpy', result1)
cv2.imshow('tensorflow', result2)
cv2.waitKey(0)
结果看起来不太一样,我找不到原因。
拉普拉斯卷积的结果在 cv2 和 tensorflow 中已经有很大差异,但我真的不明白为什么。有什么想法吗?
Original
LoG_tensorflow
LoG_numpy
所以我发现我的代码有以下错误:
- 我在用拉普拉斯核过滤之前没有填充图像
- 我错误地否定了找到的过零
- 我只在收集平均值后计算过滤图像的绝对值,这意味着平均值始终在 0 左右,因此阈值没有做任何事情
正确的代码如下所示:
from functools import partial
import tensorflow as tf
import tensorflow_addons as tfa
def _pad(image, filter_shape, mode: str = "CONSTANT", constant_values=0):
filter_height, filter_width = filter_shape
pad_top = (filter_height - 1) // 2
pad_bottom = filter_height - 1 - pad_top
pad_left = (filter_width - 1) // 2
pad_right = filter_width - 1 - pad_left
paddings = [[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]]
return tf.pad(image, paddings, mode=mode, constant_values=constant_values)
def _zero_crossings(img, kappa=.75):
rows, cols = img.shape[:2]
blocks = []
for r in range(3):
for c in range(3):
block = img[r:rows-2+r, c:cols-2+c]
blocks.append(block)
min_map = blocks[0]
max_map = blocks[0]
for block in blocks[1:]:
min_map = tf.math.minimum(min_map, block)
max_map = tf.math.maximum(max_map, block)
pos_img = img[1:rows-1, 1:cols-1] > 0.
neg_min = tf.cast(min_map < 0., 'uint8')
neg_min = neg_min * tf.cast(pos_img, 'uint8')
pos_max = tf.cast(max_map > 0., 'uint8')
pos_max = pos_max * tf.cast(tf.logical_not(pos_img), 'uint8')
zero_cross = tf.logical_or(tf.cast(neg_min, 'bool'), tf.cast(pos_max, 'bool'))
value_scale = 1. / tf.maximum(1., tf.reduce_max(img) - tf.reduce_min(img))
values = value_scale * (max_map - min_map)
values = values * tf.cast(zero_cross, 'float32')
if kappa >= 0.:
thresh = tf.reduce_mean(tf.abs(img)) * kappa
threshed = tf.cast(tf.logical_not(values < thresh), 'float32')
values = values * threshed
return values
def LoG(img, sigma=1., kappa=0.75):
img = tf.convert_to_tensor(img, dtype='float32')
img = tfa.image.utils.to_4D_image(img)
laplace_kernel = tf.constant([[0., 1., 0.], [1., -4., 1.], [0., 1., 0.]], dtype=img.dtype, name='laplace_kernel')
laplace_kernel = laplace_kernel[:, :, tf.newaxis, tf.newaxis]
gray_img = tf.image.rgb_to_grayscale(img)
gray_img = tfa.image.gaussian_filter2d(gray_img, sigma=sigma)
gray_img = _pad(gray_img, (3, 3), "REFLECT")
log = tf.nn.depthwise_conv2d(gray_img, laplace_kernel, (1, 1, 1, 1), 'VALID')
values = tf.map_fn(partial(_zero_crossings, kappa=kappa), log)
return values
图像现在相似了。
numpy
tensorflow
我找到了一个使用 numpy 和 cv2 (link) 的实现,但我在将此代码转换为 tensorflow 时遇到困难。
numpy实现的代码:
import numpy as np
import cv2
def LoG_numpy(img, sigma=1., kappa=0.75, pad=False):
"""
Applies Laplacian of Gaussians to grayscale image.
:param gray_img: image to apply LoG to
:param sigma: Gauss sigma of Gaussian applied to image, <= 0. for none
:param kappa: difference threshold as factor to mean of image values, <= 0 for none
:param pad: flag to pad output w/ zero border, keeping input image size
"""
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.GaussianBlur(img, (0,) * 2, sigma) if sigma > 0. else img
img = cv2.Laplacian(img, cv2.CV_64F)
rows, cols = img.shape[:2]
# min/max of 3x3-neighbourhoods
min_map = np.minimum.reduce(list(img[r:rows-2+r, c:cols-2+c]
for r in range(3) for c in range(3)))
max_map = np.maximum.reduce(list(img[r:rows-2+r, c:cols-2+c]
for r in range(3) for c in range(3)))
# bool matrix for image value positiv (w/out border pixels)
pos_img = img[1:rows-1, 1:cols-1] > 0.
neg_min = min_map < 0.
neg_min[~pos_img] = 0.
pos_max = max_map > 0.
pos_max[pos_img] = 0.
zero_cross = neg_min + pos_max
# values: max - min, scaled to 0--255; set to 0 for no sign change
value_scale = 255. / max(1., img.max() - img.min())
values = value_scale * (max_map - min_map)
# values[1 - zero_cross] = 0.
values[~zero_cross] = 0.
# optional thresholding
if kappa >= 0.:
thresh = float(np.absolute(img).mean()) * kappa
values[values < thresh] = 0.
log_img = values.astype(np.uint8)
if pad:
log_img = np.pad(log_img, pad_width=1, mode='constant', constant_values=0)
return log_img
还有我转换后的代码:
import tensorflow as tf
import tensorflow_addons as tfa
def LoG_tensorflow(img, sigma=1., kappa=0.75, pad=False):
img = tf.convert_to_tensor(img, dtype='float64')
laplace_kernel = tf.constant([[0., 1., 0.], [1., -4., 1.], [0., 1., 0.]], dtype='float64')
laplace_kernel = laplace_kernel[:, :, tf.newaxis, tf.newaxis]
gray_img = tf.image.rgb_to_grayscale(img)
gray_img = tfa.image.gaussian_filter2d(gray_img, sigma=sigma)
gray_img = gray_img[tf.newaxis, ...]
log = tf.nn.conv2d(gray_img, laplace_kernel, [1, 1, 1, 1], 'SAME')[0]
rows, cols = log.shape[:2]
blocks = []
for r in range(3):
for c in range(3):
block = log[r:rows-2+r, c:cols-2+c]
blocks.append(block)
min_map = blocks[0]
max_map = blocks[0]
for block in blocks[1:]:
min_map = tf.math.minimum(min_map, block)
max_map = tf.math.maximum(max_map, block)
pos_img = log[1:rows-1, 1:cols-1] > 0.
neg_min = tf.cast(min_map < 0., 'uint8')
neg_min = neg_min * tf.cast(pos_img, 'uint8')
pos_max = tf.cast(max_map > 0., 'uint8')
pos_max = pos_max * tf.cast(tf.logical_not(pos_img), 'uint8')
zero_cross = tf.logical_or(tf.cast(neg_min, 'bool'), tf.cast(pos_max, 'bool'))
value_scale = 255. / tf.maximum(1., tf.reduce_max(log) - tf.reduce_min(log))
values = value_scale * (max_map - min_map)
values = values * tf.cast(tf.logical_not(zero_cross), 'float64')
if kappa >= 0.:
thresh = tf.abs(tf.reduce_mean(log)) * kappa
threshed = tf.cast(values > thresh, 'float64')
values = values * threshed
if pad:
values = tf.pad(values, [1, 1], mode='CONSTANT', constant_values=0)
return values
拉普拉斯滤波器值取自 cv2 docs。
img1 = cv2.imread(r"original.jpg")
result1 = LoG_numpy(img1, kappa=5.)
result2 = LoG_tensorflow(img1, kappa=5.)
result2 = result2.numpy().reshape((222, 222)).astype('uint8')
cv2.imshow('original', img1)
cv2.imshow('numpy', result1)
cv2.imshow('tensorflow', result2)
cv2.waitKey(0)
结果看起来不太一样,我找不到原因。 拉普拉斯卷积的结果在 cv2 和 tensorflow 中已经有很大差异,但我真的不明白为什么。有什么想法吗?
Original | LoG_tensorflow | LoG_numpy |
---|---|---|
所以我发现我的代码有以下错误:
- 我在用拉普拉斯核过滤之前没有填充图像
- 我错误地否定了找到的过零
- 我只在收集平均值后计算过滤图像的绝对值,这意味着平均值始终在 0 左右,因此阈值没有做任何事情
正确的代码如下所示:
from functools import partial
import tensorflow as tf
import tensorflow_addons as tfa
def _pad(image, filter_shape, mode: str = "CONSTANT", constant_values=0):
filter_height, filter_width = filter_shape
pad_top = (filter_height - 1) // 2
pad_bottom = filter_height - 1 - pad_top
pad_left = (filter_width - 1) // 2
pad_right = filter_width - 1 - pad_left
paddings = [[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]]
return tf.pad(image, paddings, mode=mode, constant_values=constant_values)
def _zero_crossings(img, kappa=.75):
rows, cols = img.shape[:2]
blocks = []
for r in range(3):
for c in range(3):
block = img[r:rows-2+r, c:cols-2+c]
blocks.append(block)
min_map = blocks[0]
max_map = blocks[0]
for block in blocks[1:]:
min_map = tf.math.minimum(min_map, block)
max_map = tf.math.maximum(max_map, block)
pos_img = img[1:rows-1, 1:cols-1] > 0.
neg_min = tf.cast(min_map < 0., 'uint8')
neg_min = neg_min * tf.cast(pos_img, 'uint8')
pos_max = tf.cast(max_map > 0., 'uint8')
pos_max = pos_max * tf.cast(tf.logical_not(pos_img), 'uint8')
zero_cross = tf.logical_or(tf.cast(neg_min, 'bool'), tf.cast(pos_max, 'bool'))
value_scale = 1. / tf.maximum(1., tf.reduce_max(img) - tf.reduce_min(img))
values = value_scale * (max_map - min_map)
values = values * tf.cast(zero_cross, 'float32')
if kappa >= 0.:
thresh = tf.reduce_mean(tf.abs(img)) * kappa
threshed = tf.cast(tf.logical_not(values < thresh), 'float32')
values = values * threshed
return values
def LoG(img, sigma=1., kappa=0.75):
img = tf.convert_to_tensor(img, dtype='float32')
img = tfa.image.utils.to_4D_image(img)
laplace_kernel = tf.constant([[0., 1., 0.], [1., -4., 1.], [0., 1., 0.]], dtype=img.dtype, name='laplace_kernel')
laplace_kernel = laplace_kernel[:, :, tf.newaxis, tf.newaxis]
gray_img = tf.image.rgb_to_grayscale(img)
gray_img = tfa.image.gaussian_filter2d(gray_img, sigma=sigma)
gray_img = _pad(gray_img, (3, 3), "REFLECT")
log = tf.nn.depthwise_conv2d(gray_img, laplace_kernel, (1, 1, 1, 1), 'VALID')
values = tf.map_fn(partial(_zero_crossings, kappa=kappa), log)
return values
图像现在相似了。
numpy | tensorflow |
---|---|