卷积神经网络模型预测没有猫

Convolutional Neural Network model predicts no cats

我训练了我的第一个 CNN 模型。我从 Kaggle dataset 中获取了前 100 张猫的图像和前 100 张狗的图像作为我的自定义数据集。

训练模型后,我尝试将相同的图像反馈给模型以查看预测结果。结果,我在所有图像上的得分都在 0.5 到 0.6 之间。虽然我认为猫应该 <0.5,狗应该 >0.5。这是我的模型架构、训练过程或我的数据集太小的问题吗?为什么没有图像低于 0.5?

这是我的代码:

首先我生成要处理的.csv文件:

import pandas as pd
import os
import torch

device = ("cuda" if torch.cuda.is_available() else "cpu")

train_df = pd.DataFrame(columns=["img_name","label"])
train_df["img_name"] = os.listdir("train/")
for idx, i in enumerate(os.listdir("train/")):
    if "cat" in i:
        train_df["label"][idx] = 0
    if "dog" in i:
        train_df["label"][idx] = 1

train_df.to_csv (r'train_csv.csv', index = False, header=True)

然后我准备数据集:

from torch.utils.data import Dataset
import pandas as pd
import os
from PIL import Image
import torch

class CatsAndDogsDataset(Dataset):
    def __init__(self, root_dir, annotation_file, transform=None):
        self.root_dir = root_dir
        self.annotations = pd.read_csv(annotation_file)
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_id = self.annotations.iloc[index, 0]
        img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
        y_label = torch.tensor(float(self.annotations.iloc[index, 1]))

        if self.transform is not None:
            img = self.transform(img)

        return (img, y_label)

这是我的模型:

import torch.nn as nn
import torchvision.models as models

class CNN(nn.Module):
    def __init__(self, train_CNN=False, num_classes=1):
        super(CNN, self).__init__()
        self.train_CNN = train_CNN
        self.inception = models.inception_v3(pretrained=True, aux_logits=False)
        self.inception.fc = nn.Linear(self.inception.fc.in_features, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, images):
        features = self.inception(images)
        return self.sigmoid(self.dropout(self.relu(features))).squeeze(1)

这是我的超参数、转换和数据加载器:

from torch.utils.data import DataLoader
import torchvision.transforms as transforms

num_epochs = 10
learning_rate = 0.00001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 0
transform = transforms.Compose(
        [
            transforms.Resize((356, 356)),
            transforms.CenterCrop((299, 299)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ]
    )
dataset = CatsAndDogsDataset("train","train_csv.csv",transform=transform)
train_size = int(0.8 * len(dataset))
validation_size = len(dataset) - train_size
train_set, validation_set = torch.utils.data.random_split(dataset, [train_size, validation_size])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers,pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers, pin_memory=pin_memory)

model = CNN().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for name, param in model.inception.named_parameters():
    if "fc.weight" in name or "fc.bias" in name:
        param.requires_grad = True
    else:
        param.requires_grad = train_CNN

和准确性检查:

def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
    model.train()
    return f"{float(num_correct)/float(num_samples)*100:.2f}"

这是我的训练函数:

from tqdm import tqdm

def train():
    model.train()
    for epoch in range(num_epochs):
        loop = tqdm(train_loader, total = len(train_loader), leave = True)
        for imgs, labels in loop:
            imgs = imgs.to(device)
            labels = labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
            loop.set_postfix(loss = loss.item(), val_acc = check_accuracy(validation_loader, model))
if __name__ == "__main__":
    train()
Epoch [0/10]: 100%|██████████| 6/6 [12:00<00:00, 120.10s/it, loss=0.652, val_acc=39.02]
Epoch [1/10]: 100%|██████████| 6/6 [11:51<00:00, 118.61s/it, loss=0.497, val_acc=39.02]
Epoch [2/10]: 100%|██████████| 6/6 [11:27<00:00, 114.51s/it, loss=0.693, val_acc=39.02]
Epoch [3/10]: 100%|██████████| 6/6 [11:04<00:00, 110.77s/it, loss=0.531, val_acc=39.02]
Epoch [4/10]: 100%|██████████| 6/6 [10:58<00:00, 109.68s/it, loss=0.693, val_acc=39.02]
Epoch [5/10]: 100%|██████████| 6/6 [12:03<00:00, 120.51s/it, loss=0.803, val_acc=39.02]
Epoch [6/10]: 100%|██████████| 6/6 [11:33<00:00, 115.62s/it, loss=0.693, val_acc=39.02]
Epoch [7/10]: 100%|██████████| 6/6 [11:27<00:00, 114.56s/it, loss=0.675, val_acc=39.02]
Epoch [8/10]: 100%|██████████| 6/6 [11:42<00:00, 117.10s/it, loss=0.806, val_acc=39.02]
Epoch [9/10]: 100%|██████████| 6/6 [12:15<00:00, 122.58s/it, loss=0.768, val_acc=39.02]

然后我遍历模型检查每张图像的预测(dataset 变量可用,因为它在同一个 Jupyter Notebook 中):

import numpy as np

with torch.no_grad():
  for index in range(len(dataset)):
    item = dataset[index]
    image_tensor = item[0]
    true_target = item[1]
    img_np = np.array(image_tensor)
    img_normalized = img_np.transpose(1, 2, 0)
    image = torch.unsqueeze(image_tensor, 0)
    prediction = model(image)
    predicted_class = prediction[0]
    print("class: " + str(true_target.item()) + " score: " + str(predicted_class.item()))

输出:

class: 0.0 score: 0.547210156917572
class: 0.0 score: 0.5
class: 0.0 score: 0.5348594188690186
class: 0.0 score: 0.5336627960205078
class: 0.0 score: 0.5178861618041992
class: 0.0 score: 0.5692692995071411
class: 0.0 score: 0.5
class: 0.0 score: 0.5381814241409302
class: 0.0 score: 0.54604572057724
class: 0.0 score: 0.5157472491264343
class: 0.0 score: 0.5257323980331421
class: 0.0 score: 0.5137990713119507
class: 0.0 score: 0.5247158408164978
class: 0.0 score: 0.5320644378662109
class: 0.0 score: 0.5775637626647949
class: 0.0 score: 0.528205156326294
class: 0.0 score: 0.5457945466041565
class: 0.0 score: 0.5301501154899597
class: 0.0 score: 0.5102765560150146
class: 0.0 score: 0.5069065690040588
class: 0.0 score: 0.519408106803894
class: 0.0 score: 0.5414850115776062
class: 0.0 score: 0.5041879415512085
class: 0.0 score: 0.5055546760559082
show more (open the raw output data in a text editor) ...
class: 1.0 score: 0.5
class: 1.0 score: 0.5
class: 1.0 score: 0.5166758894920349
class: 1.0 score: 0.5343206524848938
class: 1.0 score: 0.5716230869293213

所以没有猫被预测到

你能把你的模型拱门改成这个吗(只需删除 dropoutrelu

import torch.nn as nn
import torchvision.models as models

class CNN(nn.Module):
    def __init__(self, train_CNN=False, num_classes=1):
        super(CNN, self).__init__()
        self.train_CNN = train_CNN
        self.inception = models.inception_v3(pretrained=True, aux_logits=False)
        self.inception.fc = nn.Linear(self.inception.fc.in_features, num_classes)
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, images):
        features = self.inception(images)
        return self.sigmoid(features).squeeze(1)

并且在进行推理之前尝试使用 model.eval(),因为你已经使用了 dropout