使用 python 对 7 段进行数字识别

Number Recognition on 7 segment using python

我正在使用 python 在 Jupyter notebook 上编写代码,以使用 7segment(FND) 识别设备上的号码。

我用了opencv得到了图像的边缘

import cv2
import matplotlib.pyplot as plt 

def detect_edge(image):
    ''' function Detecting Edges '''

    image_with_edges = cv2.Canny(image , 100, 200)

    images = [image , image_with_edges]

    location = [121, 122]

    for loc, img in zip(location, images):
        plt.subplot(loc)
        plt.imshow(img, cmap='gray')

    plt.savefig('edge.png')
    plt.show()

image = cv2.imread('/Users/USER/Desktop/test/test2.png', 0)
detect_edge(image)

这是我从上面的代码中得到的示例输入和输出数据的屏幕截图:

我不确定如何从这里开始。我想得到识别号码 在这种情况下为 51.12。

在 运行 深度学习之前,我是否应该先裁剪数字所在的 FND 部分?

我应该如何从这里开始?

如果您想使用深度学习,一种方法是使用卷积神经网络 (CNN)。您是否首先要裁剪图像取决于您的应用程序。你想从你所附的图片中识别出显示器吗?那么你不应该手动裁剪图像。此外,你需要大量数据来训练你自己的 CNN。

另一种方法是使用现成的光学字符识别引擎,例如 tesseract pytesseract. These are already trained and can achieve good results. I have no experience with detecting 7 segment displays though, so it could be that they do not work for 7 segment displays. They have tried OCR with tesseract for 7 segment displays here: ocr + 7 segment display

您可以尝试的最后一件事是首先从大图片中检测显示,然后将检测到的裁剪区域提供给 OCR 引擎。

我觉得使用 CNN 来解决这样的问题有点过分了。特别是考虑到这是一个 7 段显示器,我们应该能够解决这个问题而无需诉诸那种复杂性。

您已经标出了角落,所以我假设您可以可靠地裁剪和取消旋转(使其平整)显示。

我们只想获取数字。在这种情况下,我首先转换为 LAB 并在 b 通道上设置阈值。

然后我用opencv的findContours标出了周长:

之后我裁剪出每个单独的数字:

然后我分别查找每个段并根据哪些段处于活动状态确定数量(我对 1 使用了一个特殊情况,我检查了宽度和高度的比率)。

这是我使用的代码(两个文件) segments.py

import numpy as np

class Segments:
    def __init__(self):
        # create a 7seg model
        self.flags = [];
        self.segments = [];
        h1 = [[0, 1.0],[0, 0.1]];       # 0
        h2 = [[0, 1.0],[0.45, 0.55]];   # 1
        h3 = [[0, 1.0],[0.9, 1.0]];     # 2
        vl1 = [[0, 0.2],[0, 0.5]];      # 3 # upper-left
        vl2 = [[0, 0.2],[0.5, 1.0]];    # 4
        vr1 = [[0.8, 1.0],[0, 0.5]];    # 5 # upper-right
        vr2 = [[0.8, 1.0], [0.5, 1.0]]; # 6
        self.segments.append(h1);
        self.segments.append(h2);
        self.segments.append(h3);
        self.segments.append(vl1);
        self.segments.append(vl2);
        self.segments.append(vr1);
        self.segments.append(vr2);

    # process an image and set flags
    def digest(self, number):
        # reset flags
        self.flags = [];

        # check res to see if it's a one
        h, w = number.shape[:2];
        if w < 0.5 * h:
            self.flags.append(5);
            self.flags.append(6);
            return;

        # check for segments
        for a in range(len(self.segments)):
            seg = self.segments[a];
            # get bounds
            xl, xh = seg[0];
            yl, yh = seg[1];
            # convert to pix coords
            xl = int(xl * w);
            xh = int(xh * w);
            yl = int(yl * h);
            yh = int(yh * h);
            sw = xh - xl;
            sh = yh - yl;
            # check
            count = np.count_nonzero(number[yl:yh, xl:xh] == 255);
            if count / (sh * sw) > 0.5: # 0.5 is a sensitivity measure
                self.flags.append(a);

    # returns the stored number (stored in self.flags)
    def getNum(self):
        # hardcoding outputs
        if self.flags == [0,2,3,4,5,6]:
            return 0;
        if self.flags == [5,6]:
            return 1;
        if self.flags == [0,1,2,4,5]:
            return 2;
        if self.flags == [0,1,2,5,6]:
            return 3;
        if self.flags == [1,3,5,6]:
            return 4;
        if self.flags == [0,1,2,3,6]:
            return 5;
        if self.flags == [0,1,2,3,4,6]:
            return 6;
        if self.flags == [0,5,6]:
            return 7;
        if self.flags == [0,1,2,3,4,5,6]:
            return 8;
        if self.flags == [0,1,2,3,5,6]:
            return 9;
        # ERROR
        return -1;

main.py

import cv2
import numpy as np
from segments import Segments

# load image
img = cv2.imread("seg7.jpg");

# crop
img = img[300:800,100:800,:];

# lab
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l,a,b = cv2.split(lab);

# show
cv2.imshow("orig", img);

# closing operation
kernel = np.ones((5,5), np.uint8);

# threshold params
low = 165;
high = 200;
iters = 3;

# make copy
copy = b.copy();

# threshold
thresh = cv2.inRange(copy, low, high);

# dilate
for a in range(iters):
    thresh = cv2.dilate(thresh, kernel);

# erode
for a in range(iters):
    thresh = cv2.erode(thresh, kernel);

# show image
cv2.imshow("thresh", thresh);
cv2.imwrite("threshold.jpg", thresh);

# start processing
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);

# draw
for contour in contours:
    cv2.drawContours(img, [contour], 0, (0,255,0), 3);

# get res of each number
bounds = [];
h, w = img.shape[:2];
for contour in contours:
    left = w;
    right = 0;
    top = h;
    bottom = 0;
    for point in contour:
        point = point[0];
        x, y = point;
        if x < left:
            left = x;
        if x > right:
            right = x;
        if y < top:
            top = y;
        if y > bottom:
            bottom = y;
    tl = [left, top];
    br = [right, bottom];
    bounds.append([tl, br]);

# crop out each number
cuts = [];
number = 0;
for bound in bounds:
    tl, br = bound;
    cut_img = thresh[tl[1]:br[1], tl[0]:br[0]];
    cuts.append(cut_img);
    number += 1;
    cv2.imshow(str(number), cut_img);

# font 
font = cv2.FONT_HERSHEY_SIMPLEX;

# create a segment model
model = Segments();
index = 0;
for cut in cuts:
    # save image
    cv2.imwrite(str(index) + "_" + str(number) + ".jpg", cut);

    # process
    model.digest(cut);
    number = model.getNum();
    print(number);
    cv2.imshow(str(index), cut);

    # draw and save again
    h, w = cut.shape[:2];
    drawn = np.zeros((h, w, 3), np.uint8);
    drawn[:, :, 0] = cut;
    drawn = cv2.putText(drawn, str(number), (10,30), font, 1, (0,0,255), 2, cv2.LINE_AA);
    cv2.imwrite("drawn" + str(index) + "_" + str(number) + ".jpg", drawn);
    
    index += 1;
    # cv2.waitKey(0);


# show
cv2.imshow("contours", img);
cv2.imwrite("contours.jpg", img);
cv2.waitKey(0);

我不能保证它总是有效,但只要稍微调整一下就可以使用。如果图像不平坦,请记住取消旋转图像。段模型假设数字大部分是直立的。