如何正确扭曲 python 中的文档图像?
How to warp an document image in python correctly?
通过使用这个link,我制作了变形网格:
inputs = cv2.imread("../datasets/images/0.jpg")
nh, nw = inputs.shape[0]//8, inputs.shape[1]//8
inputs = cv2.resize(inputs, dsize=(nh, nw), interpolation=cv2.INTER_AREA)
mr = nh
mc = nw
xx = np.arange(mr-1, -1, -1)
yy = np.arange(0, mc, 1)
[Y, X] = np.meshgrid(xx, yy)
ms = np.transpose(np.asarray([X.flatten('F'), Y.flatten('F')]), (1,0))
perturbed_mesh = ms
nv = np.random.randint(20) - 1
for k in range(nv):
#Choosing one vertex randomly
vidx = np.random.randint(np.shape(ms)[0])
vtex = ms[vidx, :]
#Vector between all vertices and the selected one
xv = perturbed_mesh - vtex
#Random movement
mv = (np.random.rand(1,2) - 0.5)*20
hxv = np.zeros((np.shape(xv)[0], np.shape(xv)[1] +1) )
hxv[:, :-1] = xv
hmv = np.tile(np.append(mv, 0), (np.shape(xv)[0],1))
d = np.cross(hxv, hmv)
d = np.absolute(d[:, 2])
d = d / (np.linalg.norm(mv, ord=2))
wt = d
curve_type = np.random.rand(1)
if curve_type > 0.3:
alpha = np.random.rand(1) * 50 + 50
wt = alpha / (wt + alpha)
else:
alpha = np.random.rand(1) + 1
wt = 1 - (wt / 100 )**alpha
msmv = mv * np.expand_dims(wt, axis=1)
perturbed_mesh = perturbed_mesh + msmv
所以我得到了这样的网格:
然后我尝试将源图像像素映射到生成的网格上。
img = cv2.copyMakeBorder(inputs, dh, dh, dw, dw, borderType=cv2.BORDER_CONSTANT, value=(0,0,0))
xs, ys = perturbed_mesh[:, 0], perturbed_mesh[:, 1]
xs = xs.reshape(nh, nw).astype(np.float32)
ys = ys.reshape(nh, nw).astype(np.float32)
dst = cv2.remap(img, xs, ys, cv2.INTER_CUBIC)
plt.imshow(dst)
终于得到结果了:
但是这张图片的角上有文档,我不能用。
如何将文档映射到图像中心?
这是我在 Python/OpenCV 中为透视扭曲所做的示例。它将向您展示我是如何实现输出的扩展视图的。我不仅增加了输出大小,而且还移动了输出控制点。我将输出大小移动了 +500 像素并将其加倍为 +1000。
输入:
无展开大小写:
import numpy as np
import cv2
# read input
img = cv2.imread("building.jpg")
# resize
height,width = 1000,1500
img = cv2.resize(img, (width,height))
# specify conjugate coordinates and shift output on left and top
pts1 = np.float32([[ 250, 0],[1220, 300],[1300, 770],[ 250, 860]])
pts2 = np.float32([[0,0],[width,0],[width,height],[0,height]])
# compute perspective matrix
matrix = cv2.getPerspectiveTransform(pts1,pts2)
print(matrix.shape)
print(matrix)
# convert image to BGRA with opaque alpha
img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
# do perspective transformation setting area outside input to transparent
# extend output size so extended by 500 all around
imgOutput = cv2.warpPerspective(img, matrix, (width,height), cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# resize output, since it is too large to post
imgOutput = cv2.resize(imgOutput, (width,height))
# save the warped output
cv2.imwrite("building_warped_unexpanded.png", imgOutput)
# show the result
cv2.imshow("result", imgOutput)
cv2.waitKey(0)
cv2.destroyAllWindows()
没有展开变形结果:
扩展案例:
import numpy as np
import cv2
# read input
img = cv2.imread("building.jpg")
# resize
height,width = 1000,1500
img = cv2.resize(img, (width,height))
# specify conjugate coordinates and shift output on left and top
pts1 = np.float32([[ 250, 0],[1220, 300],[1300, 770],[ 250, 860]])
pts2 = np.float32([[+500,+500],[width+500,+500],[width+500,height+500],[+500,height+500]])
# compute perspective matrix
matrix = cv2.getPerspectiveTransform(pts1,pts2)
print(matrix.shape)
print(matrix)
# convert image to BGRA with opaque alpha
img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
# do perspective transformation setting area outside input to transparent
# extend output size so extended by 500 all around
imgOutput = cv2.warpPerspective(img, matrix, (width+1000,height+1000), cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# resize output, since it is too large to post
imgOutput = cv2.resize(imgOutput, (width,height))
# save the warped output
cv2.imwrite("building_warped.jpg", imgOutput)
# show the result
cv2.imshow("result", imgOutput)
cv2.waitKey(0)
cv2.destroyAllWindows()
扩展结果:
通过使用这个link,我制作了变形网格:
inputs = cv2.imread("../datasets/images/0.jpg")
nh, nw = inputs.shape[0]//8, inputs.shape[1]//8
inputs = cv2.resize(inputs, dsize=(nh, nw), interpolation=cv2.INTER_AREA)
mr = nh
mc = nw
xx = np.arange(mr-1, -1, -1)
yy = np.arange(0, mc, 1)
[Y, X] = np.meshgrid(xx, yy)
ms = np.transpose(np.asarray([X.flatten('F'), Y.flatten('F')]), (1,0))
perturbed_mesh = ms
nv = np.random.randint(20) - 1
for k in range(nv):
#Choosing one vertex randomly
vidx = np.random.randint(np.shape(ms)[0])
vtex = ms[vidx, :]
#Vector between all vertices and the selected one
xv = perturbed_mesh - vtex
#Random movement
mv = (np.random.rand(1,2) - 0.5)*20
hxv = np.zeros((np.shape(xv)[0], np.shape(xv)[1] +1) )
hxv[:, :-1] = xv
hmv = np.tile(np.append(mv, 0), (np.shape(xv)[0],1))
d = np.cross(hxv, hmv)
d = np.absolute(d[:, 2])
d = d / (np.linalg.norm(mv, ord=2))
wt = d
curve_type = np.random.rand(1)
if curve_type > 0.3:
alpha = np.random.rand(1) * 50 + 50
wt = alpha / (wt + alpha)
else:
alpha = np.random.rand(1) + 1
wt = 1 - (wt / 100 )**alpha
msmv = mv * np.expand_dims(wt, axis=1)
perturbed_mesh = perturbed_mesh + msmv
所以我得到了这样的网格:
然后我尝试将源图像像素映射到生成的网格上。
img = cv2.copyMakeBorder(inputs, dh, dh, dw, dw, borderType=cv2.BORDER_CONSTANT, value=(0,0,0))
xs, ys = perturbed_mesh[:, 0], perturbed_mesh[:, 1]
xs = xs.reshape(nh, nw).astype(np.float32)
ys = ys.reshape(nh, nw).astype(np.float32)
dst = cv2.remap(img, xs, ys, cv2.INTER_CUBIC)
plt.imshow(dst)
终于得到结果了:
但是这张图片的角上有文档,我不能用。
如何将文档映射到图像中心?
这是我在 Python/OpenCV 中为透视扭曲所做的示例。它将向您展示我是如何实现输出的扩展视图的。我不仅增加了输出大小,而且还移动了输出控制点。我将输出大小移动了 +500 像素并将其加倍为 +1000。
输入:
无展开大小写:
import numpy as np
import cv2
# read input
img = cv2.imread("building.jpg")
# resize
height,width = 1000,1500
img = cv2.resize(img, (width,height))
# specify conjugate coordinates and shift output on left and top
pts1 = np.float32([[ 250, 0],[1220, 300],[1300, 770],[ 250, 860]])
pts2 = np.float32([[0,0],[width,0],[width,height],[0,height]])
# compute perspective matrix
matrix = cv2.getPerspectiveTransform(pts1,pts2)
print(matrix.shape)
print(matrix)
# convert image to BGRA with opaque alpha
img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
# do perspective transformation setting area outside input to transparent
# extend output size so extended by 500 all around
imgOutput = cv2.warpPerspective(img, matrix, (width,height), cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# resize output, since it is too large to post
imgOutput = cv2.resize(imgOutput, (width,height))
# save the warped output
cv2.imwrite("building_warped_unexpanded.png", imgOutput)
# show the result
cv2.imshow("result", imgOutput)
cv2.waitKey(0)
cv2.destroyAllWindows()
没有展开变形结果:
扩展案例:
import numpy as np
import cv2
# read input
img = cv2.imread("building.jpg")
# resize
height,width = 1000,1500
img = cv2.resize(img, (width,height))
# specify conjugate coordinates and shift output on left and top
pts1 = np.float32([[ 250, 0],[1220, 300],[1300, 770],[ 250, 860]])
pts2 = np.float32([[+500,+500],[width+500,+500],[width+500,height+500],[+500,height+500]])
# compute perspective matrix
matrix = cv2.getPerspectiveTransform(pts1,pts2)
print(matrix.shape)
print(matrix)
# convert image to BGRA with opaque alpha
img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
# do perspective transformation setting area outside input to transparent
# extend output size so extended by 500 all around
imgOutput = cv2.warpPerspective(img, matrix, (width+1000,height+1000), cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# resize output, since it is too large to post
imgOutput = cv2.resize(imgOutput, (width,height))
# save the warped output
cv2.imwrite("building_warped.jpg", imgOutput)
# show the result
cv2.imshow("result", imgOutput)
cv2.waitKey(0)
cv2.destroyAllWindows()
扩展结果: