为什么 keras 模型不准确,不被识别?
Why is the keras model less accurate and not recognized?
我下载了 mnist 数据集 (jpg) 并使用 .hdf (.h5) 创建了一个模型文件。
使用.h5模型识别图片,但识别率低..
.
实际编译时精度偏低..
我是不是做错了什么?.
.
图片使用了灰度的28x28图片..
import os
import cv2
import numpy as np
import tensorflow as tf
from PIL import Image
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import random
FILENAME = 'model.h5'
WIDTH = 28
HEIGHT = 28
EPOCHES = 100
BATCH_SIZE = 32
def create_dataset(img_folder):
img_data_array=[]
class_name=[]
for path in os.listdir(img_folder):
if path == ".DS_Store":
continue
for file in os.listdir(os.path.join(img_folder, path)):
if file == ".DS_Store":
continue
image_path = os.path.join(img_folder, path, file)
image = cv2.imread( image_path, cv2.IMREAD_UNCHANGED)
image = cv2.resize(image, (HEIGHT, WIDTH),interpolation = cv2.INTER_AREA)
image = np.array(image)
image = image.astype('float32')
image /= 255
img_data_array.append(image)
class_name.append(path)
return img_data_array, class_name
img_data, class_name = create_dataset(r'/Users/animalman/Documents/test/grayscale/train')
test, test_class_name = create_dataset(r'/Users/animalman/Documents/test/grayscale/test')
target_dict = {k: v for v, k in enumerate(np.unique(class_name))}
target_val = [target_dict[class_name[i]] for i in range(len(class_name))]
test_dict = {k: v for v, k in enumerate(np.unique(test_class_name))}
test_val = [test_dict[test_class_name[i]] for i in range(len(test_class_name))]
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# tensor
history = model.fit(x=tf.cast(np.array(img_data), tf.float64), y=tf.cast(list(map(int,target_val)),tf.int32), epochs=EPOCHES, batch_size=BATCH_SIZE, validation_split=0.33)
evaluate = model.evaluate(x=tf.cast(np.array(img_data), tf.float64), y=tf.cast(list(map(int,target_val)),tf.int32), batch_size=BATCH_SIZE)
print('Train:', evaluate)
test_evaluate = model.evaluate(x=tf.cast(np.array(test), tf.float64), y=tf.cast(list(map(int,test_val)),tf.int32), batch_size=BATCH_SIZE)
print('Test:', test_evaluate)
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
test_loss, test_acc = model.evaluate(x_test, y_test)
print('mnist', test_acc)
model.save(FILENAME)
...
...
纪元 98/100
1257/1257 [==============================] - 3 秒 2 毫秒/步 - 损失:5.5190e-08 - 精度: 1.0000 - val_loss: 43.3440 - val_accuracy: 0.1135
时代 99/100
1257/1257 [==============================] - 3 秒 2 毫秒/步 - 损失:4.0746e-08 - 精度: 1.0000 - val_loss: 43.3764 - val_accuracy: 0.1136
时代 100/100
1257/1257 [==============================] - 3 秒 2 毫秒/步 - 损失:2.3033e-08 - 精度: 1.0000 - val_loss: 43.4628 - val_accuracy: 0.1136
..
..
火车:[14.343465805053711,0.7074833512306213]
313/313 [==============================] - 0s 579us/步 - 损失:14.7582 - 精度:0.6990
测试:[14.758186340332031, 0.6990000009536743]
313/313 [==============================] - 0s 850us/步 - 损失:3887.2236 - 精度:0.6991
mnist:0.6991000175476074
从这里开始
Epoch 100/100 1257/1257 [==============================] - 3s 2ms/step - loss: 2.3033e-08 - accuracy: 1.0000 - val_loss: 43.4628 - val_accuracy: 0.1136
你可以看到训练acc是1.0
,验证acc是0.1136
-> 您的模型 过度拟合 训练数据集。
我下载了 mnist 数据集 (jpg) 并使用 .hdf (.h5) 创建了一个模型文件。
使用.h5模型识别图片,但识别率低..
.
实际编译时精度偏低..
我是不是做错了什么?.
.
图片使用了灰度的28x28图片..
import os
import cv2
import numpy as np
import tensorflow as tf
from PIL import Image
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import random
FILENAME = 'model.h5'
WIDTH = 28
HEIGHT = 28
EPOCHES = 100
BATCH_SIZE = 32
def create_dataset(img_folder):
img_data_array=[]
class_name=[]
for path in os.listdir(img_folder):
if path == ".DS_Store":
continue
for file in os.listdir(os.path.join(img_folder, path)):
if file == ".DS_Store":
continue
image_path = os.path.join(img_folder, path, file)
image = cv2.imread( image_path, cv2.IMREAD_UNCHANGED)
image = cv2.resize(image, (HEIGHT, WIDTH),interpolation = cv2.INTER_AREA)
image = np.array(image)
image = image.astype('float32')
image /= 255
img_data_array.append(image)
class_name.append(path)
return img_data_array, class_name
img_data, class_name = create_dataset(r'/Users/animalman/Documents/test/grayscale/train')
test, test_class_name = create_dataset(r'/Users/animalman/Documents/test/grayscale/test')
target_dict = {k: v for v, k in enumerate(np.unique(class_name))}
target_val = [target_dict[class_name[i]] for i in range(len(class_name))]
test_dict = {k: v for v, k in enumerate(np.unique(test_class_name))}
test_val = [test_dict[test_class_name[i]] for i in range(len(test_class_name))]
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# tensor
history = model.fit(x=tf.cast(np.array(img_data), tf.float64), y=tf.cast(list(map(int,target_val)),tf.int32), epochs=EPOCHES, batch_size=BATCH_SIZE, validation_split=0.33)
evaluate = model.evaluate(x=tf.cast(np.array(img_data), tf.float64), y=tf.cast(list(map(int,target_val)),tf.int32), batch_size=BATCH_SIZE)
print('Train:', evaluate)
test_evaluate = model.evaluate(x=tf.cast(np.array(test), tf.float64), y=tf.cast(list(map(int,test_val)),tf.int32), batch_size=BATCH_SIZE)
print('Test:', test_evaluate)
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
test_loss, test_acc = model.evaluate(x_test, y_test)
print('mnist', test_acc)
model.save(FILENAME)
...
...
纪元 98/100
1257/1257 [==============================] - 3 秒 2 毫秒/步 - 损失:5.5190e-08 - 精度: 1.0000 - val_loss: 43.3440 - val_accuracy: 0.1135
时代 99/100
1257/1257 [==============================] - 3 秒 2 毫秒/步 - 损失:4.0746e-08 - 精度: 1.0000 - val_loss: 43.3764 - val_accuracy: 0.1136
时代 100/100
1257/1257 [==============================] - 3 秒 2 毫秒/步 - 损失:2.3033e-08 - 精度: 1.0000 - val_loss: 43.4628 - val_accuracy: 0.1136
..
..
火车:[14.343465805053711,0.7074833512306213]
313/313 [==============================] - 0s 579us/步 - 损失:14.7582 - 精度:0.6990
测试:[14.758186340332031, 0.6990000009536743]
313/313 [==============================] - 0s 850us/步 - 损失:3887.2236 - 精度:0.6991
mnist:0.6991000175476074
从这里开始
Epoch 100/100 1257/1257 [==============================] - 3s 2ms/step - loss: 2.3033e-08 - accuracy: 1.0000 - val_loss: 43.4628 - val_accuracy: 0.1136
你可以看到训练acc是1.0
,验证acc是0.1136
-> 您的模型 过度拟合 训练数据集。