如何使用 CNN 修复欠拟合/为什么我的代码无法正确识别图像？

Question

我对神经网络很陌生，尝试编写自己的代码来对图像进行分类。我一直在使用用于分类的混凝土裂缝图像 (https://data.mendeley.com/datasets/5y9wdsg2zt/2) 来分类图像是否有裂缝或没有缺陷。我从该数据集中随机提取了 2.000 张图像，其中 1.400 张用于我的训练集，300 张用于我的验证集和测试集。一半图像为正片/显示裂纹，另一半为负片/无缺陷。

对于分类，我使用在 ImageNet 上预训练的 VGG16。在下面，您可以看到我的完整代码，我使用不同的教程将这些代码放在一起，试图解决类似的任务。

不幸的是，它无法识别一张单独的裂缝图像，并将所有内容归类为负面/无缺陷。我尝试了不同的批量大小、时代数量、图像数量，在没有经过预训练的情况下进行了尝试，但似乎没有任何效果，我完全不知道为什么。非常感谢您的帮助，提前致谢！

如有任何问题，请随时提出。

import tensorflow as tf
import os
import numpy as np
import keras
from keras.preprocessing import image
from PIL import Image
import os.path, sys
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

import scipy as sp

from tensorflow.keras.applications import vgg16
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img, ImageDataGenerator
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras import optimizers
from tensorflow.keras.utils import *

import requests
from io import BytesIO
import random
import pickle
import itertools

from sklearn.metrics import classification_report, confusion_matrix

# load data
PATH = '/Volumes/ConcreteCrackImages'

train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
test_dir = os.path.join(PATH, 'test')

train_negative_dir = os.path.join(train_dir, 'Negative')  # directory with our training negative pictures
train_crack_dir = os.path.join(train_dir, 'Positive')  # directory with our training crack pictures
validation_negative_dir = os.path.join(validation_dir, 'Negative')  # directory with our validation negative pictures
validation_crack_dir = os.path.join(validation_dir, 'Positive')  # directory with our validation crack pictures
test_negative_dir = os.path.join(test_dir, 'Negative')  # directory with our test negative pictures
test_crack_dir = os.path.join(test_dir, 'Positive')  # directory with our test crack pictures

# understand the data

num_negative_tr = len(os.listdir(train_negative_dir))
num_crack_tr = len(os.listdir(train_crack_dir))

num_negative_val = len(os.listdir(validation_negative_dir))
num_crack_val = len(os.listdir(validation_crack_dir))

num_negative_test = len(os.listdir(test_negative_dir))
num_crack_test = len(os.listdir(test_crack_dir))

total_train = num_negative_tr + num_crack_tr
total_val = num_negative_val + num_crack_val
total_test = num_negative_test + num_crack_test

print('total training negative images:', num_negative_tr)
print('total training crack images:', num_crack_tr)

print('total validation negative images:', num_negative_val)
print('total validation crack images:', num_crack_val)

print('total test negative images:', num_negative_test)
print('total test crack images:', num_crack_test)
print("--")
print("Total training images:", total_train)
print("Total validation images:", total_val)
print("Total test images:", total_test)

# variables for pre-processing
batch_size = 32
epochs = 40
IMG_HEIGHT = 224
IMG_WIDTH = 224

# data preparation

train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data
validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data
test_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our test data

train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='binary')

val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=validation_dir,
                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                              class_mode='binary')

test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=test_dir,
                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                              class_mode='binary')

# visualize training images

sample_training_images, _ = next(train_data_gen)

# This function will plot images in the form of a grid with 1 row and 5 columns where images are placed in each column.
# =============================================================================

def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

plotImages(sample_training_images[:5])

# =============================================================================
# create the model/ import vgg16

vgg_conv = vgg16.VGG16(weights='imagenet', include_top=False, input_shape = (224, 224, 3))

# Freeze the layers except the last 4 layers
for layer in vgg_conv.layers[:-8]:
    layer.trainable = False

# Check the trainable status of the individual layers
for layer in vgg_conv.layers:
    print(layer, layer.trainable)

### MODIFY VGG STRUCTURE ###

x = vgg_conv.output
x = GlobalAveragePooling2D()(x)
x = Dense(1, activation="sigmoid")(x)

model = Model(vgg_conv.input, x)
model.compile(loss = "binary_crossentropy", optimizer = optimizers.SGD(lr=0.00001, momentum=0.9), metrics=["accuracy"])

model.summary()

# train the model

history = model.fit(
     train_data_gen,
     steps_per_epoch=total_train // batch_size,
     epochs=epochs,
     validation_data=val_data_gen,
     validation_steps=total_val // batch_size
)


# visualize training results

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

# Evaluate the model on the test data using `evaluate`
print('\nEvaluate on test data')
results = model.evaluate(test_data_gen,
                         verbose = 1)
print('test loss, test acc:', results)

#Confusion Matrix and Classification Report
Y_pred = model.predict(test_data_gen)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(test_data_gen.classes, y_pred))
print('Classification Report')
target_names = ['Negative', 'Crack']
print(classification_report(test_data_gen.classes, y_pred, target_names=target_names))

Answer 1

您可以检查几个问题。

由于您使用的是 VGG 和 ImageDataGenerator，因此您必须确保图像数据生成器执行与所需的 VGG 预训练模型相同的预处理。 VGG 使用 imagenet_utils.preprocessing_input 模式设置为“caffe”进行训练。一共有三种模式，caffe、tf、torch。不同的模型保留不同的预处理。
启动 VGG 模型时，将包含顶部设置为 false。然后你得到 VGG 的输出，做了一个全局池，然后为输出添加了一个密集。如果你深入研究 VGG 实现的源代码，顶部只是 softmax 层，还有 FC 层。 FC 层是在提取的 VGG 特征上生成抽象的方法。如果你没有足够的 FC 层，你的模型不够复杂，无法很好地学习特征 space。

你至少可以试试这两个，看看它们是否有帮助

如何使用 CNN 修复欠拟合/为什么我的代码无法正确识别图像？

How to fix underfitting using a CNN/ why can't my code identify images correctly?

python

classification

conv-neural-network

keras

tensorflow