使用 python 对 7 段进行数字识别
Number Recognition on 7 segment using python
我正在使用 python 在 Jupyter notebook 上编写代码,以使用 7segment(FND) 识别设备上的号码。
我用了opencv
得到了图像的边缘
import cv2
import matplotlib.pyplot as plt
def detect_edge(image):
''' function Detecting Edges '''
image_with_edges = cv2.Canny(image , 100, 200)
images = [image , image_with_edges]
location = [121, 122]
for loc, img in zip(location, images):
plt.subplot(loc)
plt.imshow(img, cmap='gray')
plt.savefig('edge.png')
plt.show()
image = cv2.imread('/Users/USER/Desktop/test/test2.png', 0)
detect_edge(image)
这是我从上面的代码中得到的示例输入和输出数据的屏幕截图:
我不确定如何从这里开始。我想得到识别号码
在这种情况下为 51.12。
在 运行 深度学习之前,我是否应该先裁剪数字所在的 FND 部分?
我应该如何从这里开始?
如果您想使用深度学习,一种方法是使用卷积神经网络 (CNN)。您是否首先要裁剪图像取决于您的应用程序。你想从你所附的图片中识别出显示器吗?那么你不应该手动裁剪图像。此外,你需要大量数据来训练你自己的 CNN。
另一种方法是使用现成的光学字符识别引擎,例如 tesseract pytesseract. These are already trained and can achieve good results. I have no experience with detecting 7 segment displays though, so it could be that they do not work for 7 segment displays. They have tried OCR with tesseract for 7 segment displays here: ocr + 7 segment display。
您可以尝试的最后一件事是首先从大图片中检测显示,然后将检测到的裁剪区域提供给 OCR 引擎。
我觉得使用 CNN 来解决这样的问题有点过分了。特别是考虑到这是一个 7 段显示器,我们应该能够解决这个问题而无需诉诸那种复杂性。
您已经标出了角落,所以我假设您可以可靠地裁剪和取消旋转(使其平整)显示。
我们只想获取数字。在这种情况下,我首先转换为 LAB 并在 b 通道上设置阈值。
然后我用opencv的findContours标出了周长:
之后我裁剪出每个单独的数字:
然后我分别查找每个段并根据哪些段处于活动状态确定数量(我对 1 使用了一个特殊情况,我检查了宽度和高度的比率)。
这是我使用的代码(两个文件)
segments.py
import numpy as np
class Segments:
def __init__(self):
# create a 7seg model
self.flags = [];
self.segments = [];
h1 = [[0, 1.0],[0, 0.1]]; # 0
h2 = [[0, 1.0],[0.45, 0.55]]; # 1
h3 = [[0, 1.0],[0.9, 1.0]]; # 2
vl1 = [[0, 0.2],[0, 0.5]]; # 3 # upper-left
vl2 = [[0, 0.2],[0.5, 1.0]]; # 4
vr1 = [[0.8, 1.0],[0, 0.5]]; # 5 # upper-right
vr2 = [[0.8, 1.0], [0.5, 1.0]]; # 6
self.segments.append(h1);
self.segments.append(h2);
self.segments.append(h3);
self.segments.append(vl1);
self.segments.append(vl2);
self.segments.append(vr1);
self.segments.append(vr2);
# process an image and set flags
def digest(self, number):
# reset flags
self.flags = [];
# check res to see if it's a one
h, w = number.shape[:2];
if w < 0.5 * h:
self.flags.append(5);
self.flags.append(6);
return;
# check for segments
for a in range(len(self.segments)):
seg = self.segments[a];
# get bounds
xl, xh = seg[0];
yl, yh = seg[1];
# convert to pix coords
xl = int(xl * w);
xh = int(xh * w);
yl = int(yl * h);
yh = int(yh * h);
sw = xh - xl;
sh = yh - yl;
# check
count = np.count_nonzero(number[yl:yh, xl:xh] == 255);
if count / (sh * sw) > 0.5: # 0.5 is a sensitivity measure
self.flags.append(a);
# returns the stored number (stored in self.flags)
def getNum(self):
# hardcoding outputs
if self.flags == [0,2,3,4,5,6]:
return 0;
if self.flags == [5,6]:
return 1;
if self.flags == [0,1,2,4,5]:
return 2;
if self.flags == [0,1,2,5,6]:
return 3;
if self.flags == [1,3,5,6]:
return 4;
if self.flags == [0,1,2,3,6]:
return 5;
if self.flags == [0,1,2,3,4,6]:
return 6;
if self.flags == [0,5,6]:
return 7;
if self.flags == [0,1,2,3,4,5,6]:
return 8;
if self.flags == [0,1,2,3,5,6]:
return 9;
# ERROR
return -1;
main.py
import cv2
import numpy as np
from segments import Segments
# load image
img = cv2.imread("seg7.jpg");
# crop
img = img[300:800,100:800,:];
# lab
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l,a,b = cv2.split(lab);
# show
cv2.imshow("orig", img);
# closing operation
kernel = np.ones((5,5), np.uint8);
# threshold params
low = 165;
high = 200;
iters = 3;
# make copy
copy = b.copy();
# threshold
thresh = cv2.inRange(copy, low, high);
# dilate
for a in range(iters):
thresh = cv2.dilate(thresh, kernel);
# erode
for a in range(iters):
thresh = cv2.erode(thresh, kernel);
# show image
cv2.imshow("thresh", thresh);
cv2.imwrite("threshold.jpg", thresh);
# start processing
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# draw
for contour in contours:
cv2.drawContours(img, [contour], 0, (0,255,0), 3);
# get res of each number
bounds = [];
h, w = img.shape[:2];
for contour in contours:
left = w;
right = 0;
top = h;
bottom = 0;
for point in contour:
point = point[0];
x, y = point;
if x < left:
left = x;
if x > right:
right = x;
if y < top:
top = y;
if y > bottom:
bottom = y;
tl = [left, top];
br = [right, bottom];
bounds.append([tl, br]);
# crop out each number
cuts = [];
number = 0;
for bound in bounds:
tl, br = bound;
cut_img = thresh[tl[1]:br[1], tl[0]:br[0]];
cuts.append(cut_img);
number += 1;
cv2.imshow(str(number), cut_img);
# font
font = cv2.FONT_HERSHEY_SIMPLEX;
# create a segment model
model = Segments();
index = 0;
for cut in cuts:
# save image
cv2.imwrite(str(index) + "_" + str(number) + ".jpg", cut);
# process
model.digest(cut);
number = model.getNum();
print(number);
cv2.imshow(str(index), cut);
# draw and save again
h, w = cut.shape[:2];
drawn = np.zeros((h, w, 3), np.uint8);
drawn[:, :, 0] = cut;
drawn = cv2.putText(drawn, str(number), (10,30), font, 1, (0,0,255), 2, cv2.LINE_AA);
cv2.imwrite("drawn" + str(index) + "_" + str(number) + ".jpg", drawn);
index += 1;
# cv2.waitKey(0);
# show
cv2.imshow("contours", img);
cv2.imwrite("contours.jpg", img);
cv2.waitKey(0);
我不能保证它总是有效,但只要稍微调整一下就可以使用。如果图像不平坦,请记住取消旋转图像。段模型假设数字大部分是直立的。
我正在使用 python 在 Jupyter notebook 上编写代码,以使用 7segment(FND) 识别设备上的号码。
我用了opencv
得到了图像的边缘
import cv2
import matplotlib.pyplot as plt
def detect_edge(image):
''' function Detecting Edges '''
image_with_edges = cv2.Canny(image , 100, 200)
images = [image , image_with_edges]
location = [121, 122]
for loc, img in zip(location, images):
plt.subplot(loc)
plt.imshow(img, cmap='gray')
plt.savefig('edge.png')
plt.show()
image = cv2.imread('/Users/USER/Desktop/test/test2.png', 0)
detect_edge(image)
这是我从上面的代码中得到的示例输入和输出数据的屏幕截图:
我不确定如何从这里开始。我想得到识别号码 在这种情况下为 51.12。
在 运行 深度学习之前,我是否应该先裁剪数字所在的 FND 部分?
我应该如何从这里开始?
如果您想使用深度学习,一种方法是使用卷积神经网络 (CNN)。您是否首先要裁剪图像取决于您的应用程序。你想从你所附的图片中识别出显示器吗?那么你不应该手动裁剪图像。此外,你需要大量数据来训练你自己的 CNN。
另一种方法是使用现成的光学字符识别引擎,例如 tesseract pytesseract. These are already trained and can achieve good results. I have no experience with detecting 7 segment displays though, so it could be that they do not work for 7 segment displays. They have tried OCR with tesseract for 7 segment displays here: ocr + 7 segment display。
您可以尝试的最后一件事是首先从大图片中检测显示,然后将检测到的裁剪区域提供给 OCR 引擎。
我觉得使用 CNN 来解决这样的问题有点过分了。特别是考虑到这是一个 7 段显示器,我们应该能够解决这个问题而无需诉诸那种复杂性。
您已经标出了角落,所以我假设您可以可靠地裁剪和取消旋转(使其平整)显示。
我们只想获取数字。在这种情况下,我首先转换为 LAB 并在 b 通道上设置阈值。
然后我用opencv的findContours标出了周长:
之后我裁剪出每个单独的数字:
然后我分别查找每个段并根据哪些段处于活动状态确定数量(我对 1 使用了一个特殊情况,我检查了宽度和高度的比率)。
这是我使用的代码(两个文件) segments.py
import numpy as np
class Segments:
def __init__(self):
# create a 7seg model
self.flags = [];
self.segments = [];
h1 = [[0, 1.0],[0, 0.1]]; # 0
h2 = [[0, 1.0],[0.45, 0.55]]; # 1
h3 = [[0, 1.0],[0.9, 1.0]]; # 2
vl1 = [[0, 0.2],[0, 0.5]]; # 3 # upper-left
vl2 = [[0, 0.2],[0.5, 1.0]]; # 4
vr1 = [[0.8, 1.0],[0, 0.5]]; # 5 # upper-right
vr2 = [[0.8, 1.0], [0.5, 1.0]]; # 6
self.segments.append(h1);
self.segments.append(h2);
self.segments.append(h3);
self.segments.append(vl1);
self.segments.append(vl2);
self.segments.append(vr1);
self.segments.append(vr2);
# process an image and set flags
def digest(self, number):
# reset flags
self.flags = [];
# check res to see if it's a one
h, w = number.shape[:2];
if w < 0.5 * h:
self.flags.append(5);
self.flags.append(6);
return;
# check for segments
for a in range(len(self.segments)):
seg = self.segments[a];
# get bounds
xl, xh = seg[0];
yl, yh = seg[1];
# convert to pix coords
xl = int(xl * w);
xh = int(xh * w);
yl = int(yl * h);
yh = int(yh * h);
sw = xh - xl;
sh = yh - yl;
# check
count = np.count_nonzero(number[yl:yh, xl:xh] == 255);
if count / (sh * sw) > 0.5: # 0.5 is a sensitivity measure
self.flags.append(a);
# returns the stored number (stored in self.flags)
def getNum(self):
# hardcoding outputs
if self.flags == [0,2,3,4,5,6]:
return 0;
if self.flags == [5,6]:
return 1;
if self.flags == [0,1,2,4,5]:
return 2;
if self.flags == [0,1,2,5,6]:
return 3;
if self.flags == [1,3,5,6]:
return 4;
if self.flags == [0,1,2,3,6]:
return 5;
if self.flags == [0,1,2,3,4,6]:
return 6;
if self.flags == [0,5,6]:
return 7;
if self.flags == [0,1,2,3,4,5,6]:
return 8;
if self.flags == [0,1,2,3,5,6]:
return 9;
# ERROR
return -1;
main.py
import cv2
import numpy as np
from segments import Segments
# load image
img = cv2.imread("seg7.jpg");
# crop
img = img[300:800,100:800,:];
# lab
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l,a,b = cv2.split(lab);
# show
cv2.imshow("orig", img);
# closing operation
kernel = np.ones((5,5), np.uint8);
# threshold params
low = 165;
high = 200;
iters = 3;
# make copy
copy = b.copy();
# threshold
thresh = cv2.inRange(copy, low, high);
# dilate
for a in range(iters):
thresh = cv2.dilate(thresh, kernel);
# erode
for a in range(iters):
thresh = cv2.erode(thresh, kernel);
# show image
cv2.imshow("thresh", thresh);
cv2.imwrite("threshold.jpg", thresh);
# start processing
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# draw
for contour in contours:
cv2.drawContours(img, [contour], 0, (0,255,0), 3);
# get res of each number
bounds = [];
h, w = img.shape[:2];
for contour in contours:
left = w;
right = 0;
top = h;
bottom = 0;
for point in contour:
point = point[0];
x, y = point;
if x < left:
left = x;
if x > right:
right = x;
if y < top:
top = y;
if y > bottom:
bottom = y;
tl = [left, top];
br = [right, bottom];
bounds.append([tl, br]);
# crop out each number
cuts = [];
number = 0;
for bound in bounds:
tl, br = bound;
cut_img = thresh[tl[1]:br[1], tl[0]:br[0]];
cuts.append(cut_img);
number += 1;
cv2.imshow(str(number), cut_img);
# font
font = cv2.FONT_HERSHEY_SIMPLEX;
# create a segment model
model = Segments();
index = 0;
for cut in cuts:
# save image
cv2.imwrite(str(index) + "_" + str(number) + ".jpg", cut);
# process
model.digest(cut);
number = model.getNum();
print(number);
cv2.imshow(str(index), cut);
# draw and save again
h, w = cut.shape[:2];
drawn = np.zeros((h, w, 3), np.uint8);
drawn[:, :, 0] = cut;
drawn = cv2.putText(drawn, str(number), (10,30), font, 1, (0,0,255), 2, cv2.LINE_AA);
cv2.imwrite("drawn" + str(index) + "_" + str(number) + ".jpg", drawn);
index += 1;
# cv2.waitKey(0);
# show
cv2.imshow("contours", img);
cv2.imwrite("contours.jpg", img);
cv2.waitKey(0);
我不能保证它总是有效,但只要稍微调整一下就可以使用。如果图像不平坦,请记住取消旋转图像。段模型假设数字大部分是直立的。