Python 中的 OCR 机器学习 - 使用 keras 训练模型
OCR Machine Learning in Python -Training model with keras
这是我正在尝试开发的代码,用于创建 OCR 程序,以便用户可以手写数字,而程序 returns 是高精度的实际值。
from keras.models import load_model
from tkinter import *
import tkinter as tk
import appscript
#import win32gui
from PIL import ImageGrab, Image
import numpy as np
model = load_model('mnist.h5')
def predict_digit(img):
#resize image to 28x28 pixels
img = img.resize((28,28))
#convert rgb to grayscale
img = img.convert('L')
img = np.array(img)
#reshaping to support our model input and normalizing
img = img.reshape(1,28,28,1)
img = img/255.0
#predicting the class
res = model.predict([img])[0]
return np.argmax(res), max(res)
class App(tk.Tk):
def __init__(self):
tk.Tk.__init__(self)
self.x = self.y = 0
# Creating elements
self.canvas = tk.Canvas(self, width=300, height=300, bg = "white", cursor="cross")
self.label = tk.Label(self, text="Draw..", font=("Helvetica", 48))
self.classify_btn = tk.Button(self, text = "Recognise", command = self.classify_handwriting)
self.button_clear = tk.Button(self, text = "Clear", command = self.clear_all)
# Grid structure
self.canvas.grid(row=0, column=0, pady=2, sticky=W, )
self.label.grid(row=0, column=1,pady=2, padx=2)
self.classify_btn.grid(row=1, column=1, pady=2, padx=2)
self.button_clear.grid(row=1, column=0, pady=2)
#self.canvas.bind("<Motion>", self.start_pos)
self.canvas.bind("<B1-Motion>", self.draw_lines)
def clear_all(self):
self.canvas.delete("all")
def classify_handwriting(self):
#HWND = self.canvas.winfo_id() # get the handle of the canvas
#rect=self.canvas.coords(HWND)
#rect = win32gui.GetWindowRect(HWND) # get the coordinate of the canvas
#a,b,c,d = rect
#a,b,c,d=self.canvas.winfo_rootx(), self.canvas.winfo_rooty(), self.canvas.winfo_width(),self.canvas.winfo_height()
x, y = (self.canvas.winfo_rootx(), self.canvas.winfo_rooty())
width, height = (self.canvas.winfo_width(), self.canvas.winfo_height())
a, b, c, d = (x, y, x+width, y+height)
rect=(a+4,b+4,c-4,d-4)
im = ImageGrab.grab(rect)
digit, acc = predict_digit(im)
self.label.configure(text= str(digit)+', '+ str(int(acc*100))+'%')
def draw_lines(self, event):
self.x = event.x
self.y = event.y
r=8
HWND=self.canvas.create_oval(self.x-r, self.y-r, self.x + r, self.y + r, fill='black')
app = App()
mainloop()
这段代码的问题在于,无论我重写多少,我总是绘制它 returns 0 。
如果有 python/Keras/Tkinter 知识的人可以帮助我找到问题并解决它。
模型的源代码 here。
注意:这段代码没有错误
这是程序的截图 window
程序中使用的神经网络代码不够准确,无法做出正确的预测。训练精度如下:
代码的实际逻辑没有错。改进 NN 代码将是解决此问题的方法。
这是我正在尝试开发的代码,用于创建 OCR 程序,以便用户可以手写数字,而程序 returns 是高精度的实际值。
from keras.models import load_model
from tkinter import *
import tkinter as tk
import appscript
#import win32gui
from PIL import ImageGrab, Image
import numpy as np
model = load_model('mnist.h5')
def predict_digit(img):
#resize image to 28x28 pixels
img = img.resize((28,28))
#convert rgb to grayscale
img = img.convert('L')
img = np.array(img)
#reshaping to support our model input and normalizing
img = img.reshape(1,28,28,1)
img = img/255.0
#predicting the class
res = model.predict([img])[0]
return np.argmax(res), max(res)
class App(tk.Tk):
def __init__(self):
tk.Tk.__init__(self)
self.x = self.y = 0
# Creating elements
self.canvas = tk.Canvas(self, width=300, height=300, bg = "white", cursor="cross")
self.label = tk.Label(self, text="Draw..", font=("Helvetica", 48))
self.classify_btn = tk.Button(self, text = "Recognise", command = self.classify_handwriting)
self.button_clear = tk.Button(self, text = "Clear", command = self.clear_all)
# Grid structure
self.canvas.grid(row=0, column=0, pady=2, sticky=W, )
self.label.grid(row=0, column=1,pady=2, padx=2)
self.classify_btn.grid(row=1, column=1, pady=2, padx=2)
self.button_clear.grid(row=1, column=0, pady=2)
#self.canvas.bind("<Motion>", self.start_pos)
self.canvas.bind("<B1-Motion>", self.draw_lines)
def clear_all(self):
self.canvas.delete("all")
def classify_handwriting(self):
#HWND = self.canvas.winfo_id() # get the handle of the canvas
#rect=self.canvas.coords(HWND)
#rect = win32gui.GetWindowRect(HWND) # get the coordinate of the canvas
#a,b,c,d = rect
#a,b,c,d=self.canvas.winfo_rootx(), self.canvas.winfo_rooty(), self.canvas.winfo_width(),self.canvas.winfo_height()
x, y = (self.canvas.winfo_rootx(), self.canvas.winfo_rooty())
width, height = (self.canvas.winfo_width(), self.canvas.winfo_height())
a, b, c, d = (x, y, x+width, y+height)
rect=(a+4,b+4,c-4,d-4)
im = ImageGrab.grab(rect)
digit, acc = predict_digit(im)
self.label.configure(text= str(digit)+', '+ str(int(acc*100))+'%')
def draw_lines(self, event):
self.x = event.x
self.y = event.y
r=8
HWND=self.canvas.create_oval(self.x-r, self.y-r, self.x + r, self.y + r, fill='black')
app = App()
mainloop()
这段代码的问题在于,无论我重写多少,我总是绘制它 returns 0 。
如果有 python/Keras/Tkinter 知识的人可以帮助我找到问题并解决它。
模型的源代码 here。
注意:这段代码没有错误
这是程序的截图 window
程序中使用的神经网络代码不够准确,无法做出正确的预测。训练精度如下:
代码的实际逻辑没有错。改进 NN 代码将是解决此问题的方法。