Pytorch NN 训练问题:NN 的损失不减少
Pytorch NN Training issue: Loss of NN does not decrase
我想将随机的 Instagram 图片分类为 "image has a dog" 或 "image has not a dog"。
为了训练我的 NN 对狗进行分类,我想使用 Stanford Dogs 数据集,所以我有大约 20.000 个不同品种的不同狗的训练图像。
但是在训练我的 NN 时损失并没有减少,我检查了不同的学习率以及有或没有 dropout 层。
任何人都可以提供提示或任何人看到以下代码中的错误吗?:
import torch
import torchvision
from torchvision import transforms
from PIL import Image
from os import listdir
import os
import random
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
TRAINDATAPATH = 'C:/Users/.../Desktop/train/'
TESTDATAPATH = 'C:/Users/.../Desktop/#apfel/'
"""normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)"""
normalize = transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
transforms = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
normalize])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_data_list = []
target_list = []
train_data = []
batch_size = 1
files = listdir(TRAINDATAPATH)
for i in range(len(listdir(TRAINDATAPATH))):
try:
f = random.choice(files)
files.remove(f)
img = Image.open(TRAINDATAPATH + f)
img_tensor = transforms(img) # (3,256,256)
train_data_list.append(img_tensor)
isObj = 1 if 'obj' in f else 0
isNotObj = 0 if 'obj' in f else 1
target = [isObj, isNotObj]
target_list.append(target)
if len(train_data_list) >= 1:
train_data.append((torch.stack(train_data_list), target_list))
train_data_list = []
target_list = []
print('Loaded batch ', int(len(train_data)/batch_size), 'of ', int(len(listdir(TRAINDATAPATH))/batch_size))
print('Percentage Done: ', 100*int(len(train_data)/batch_size)/int(len(listdir(TRAINDATAPATH))/batch_size), '%')
except Exception:
print("Error occured but ignored")
print(str(Exception))
continue
class Netz(nn.Module):
def __init__(self):
super(Netz, self).__init__()
self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
self.conv2 = nn.Conv2d(6, 12, kernel_size=5)
self.conv3 = nn.Conv2d(12, 18, kernel_size=5)
self.conv4 = nn.Conv2d(18, 24, kernel_size=5)
self.fc1 = nn.Linear(3456, 1000)
self.fc2 = nn.Linear(1000, 2)
def forward(self, x):
x = self.conv1(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv2(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv3(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv4(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = x.view(-1,3456)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return torch.sigmoid(x)
model = Netz()
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
optimizer = optim.Adadelta(model.parameters(), lr=10)
def train(epoch):
global model
model.train()
batch_idx = 0
for data, target in train_data:
batch_idx += 1
data = data.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
target = torch.Tensor(target).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
data = Variable(data)
target = Variable(target)
optimizer.zero_grad()
output = model(data)
criterion = F.binary_cross_entropy
loss = criterion(output, target)
loss.backward()
optimizer.step()
print('Train Epoch: '+ str(epoch) + '\tLoss: ' + str(loss.data.item()) )
def test():
global model
model.eval()
files = listdir(TESTDATAPATH)
f = random.choice(files)
img = Image.open(TESTDATAPATH + f)
img_eval_tensor = transforms(img)
img_eval_tensor.unsqueeze_(0)
data = Variable(img_eval_tensor.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) )
out = model(data)
string_prediction = str(out.data.max(0, keepdim=True)[1])
print(string_prediction[9:10])
for epoch in range(1,4):
train(epoch)
i = 100
while i > 0:
test()
i -= 1
在 TRAINDATAPATH 中有数千张文件名为 "obj_XXX.jpg" 的狗图像和一些其他文件名为 "obj" 的不带狗的图像。
在 TESTDATAPATH 中只是随机图像,一些有狗,一些没有。
NN 将它们全部分类为 "not including dogs" 或“0”,这是不正确的。
感谢您的帮助!
您正在进行二进制 class化,但您使用的是两个 classes:
isObj = 1 if 'obj' in f else 0
isNotObj = 0 if 'obj' in f else 1
target = [isObj, isNotObj]
二进制情况下,应该是单个class,其中1表示是狗,0表示不是。你已经这样做了,但是两次。您可以完全删除 isNotObj
,只保留 isObj
.
您需要相应地调整模型,使其仅预测 isObj
,因此 fc2
应该只有 1 个 class 作为输出:
self.fc2 = nn.Linear(1000, 1)
在测试阶段需要根据单个class进行预测,可以看做是狗的概率。然后你设置一个阈值,你认为模型有足够的信心它实际上是一只狗。为了使其平衡,阈值为 0.5,因此高于该值的所有内容都是狗,低于它的所有内容都不是。这可以通过 torch.round
:
轻松实现
# Size: [batch_size, 1]
out = model(data)
predictions = torch.round(out)
# Get rid of the singular dimension
# To get size: [batch_size]
predictions = predictions.squeeze(1)
除此之外,10的学习率已经是天文数字了,学习率大于1就无法收敛。更合适的学习率在 0.01 或 0.001 左右。
另外,由于您是 PyTorch 的新手:请不要使用 Variable
,它已被 2 年前发布的 PyTorch 0.4.0 及其所有功能弃用已合并到张量中。
我想将随机的 Instagram 图片分类为 "image has a dog" 或 "image has not a dog"。 为了训练我的 NN 对狗进行分类,我想使用 Stanford Dogs 数据集,所以我有大约 20.000 个不同品种的不同狗的训练图像。
但是在训练我的 NN 时损失并没有减少,我检查了不同的学习率以及有或没有 dropout 层。
任何人都可以提供提示或任何人看到以下代码中的错误吗?:
import torch
import torchvision
from torchvision import transforms
from PIL import Image
from os import listdir
import os
import random
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
TRAINDATAPATH = 'C:/Users/.../Desktop/train/'
TESTDATAPATH = 'C:/Users/.../Desktop/#apfel/'
"""normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)"""
normalize = transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
transforms = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
normalize])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_data_list = []
target_list = []
train_data = []
batch_size = 1
files = listdir(TRAINDATAPATH)
for i in range(len(listdir(TRAINDATAPATH))):
try:
f = random.choice(files)
files.remove(f)
img = Image.open(TRAINDATAPATH + f)
img_tensor = transforms(img) # (3,256,256)
train_data_list.append(img_tensor)
isObj = 1 if 'obj' in f else 0
isNotObj = 0 if 'obj' in f else 1
target = [isObj, isNotObj]
target_list.append(target)
if len(train_data_list) >= 1:
train_data.append((torch.stack(train_data_list), target_list))
train_data_list = []
target_list = []
print('Loaded batch ', int(len(train_data)/batch_size), 'of ', int(len(listdir(TRAINDATAPATH))/batch_size))
print('Percentage Done: ', 100*int(len(train_data)/batch_size)/int(len(listdir(TRAINDATAPATH))/batch_size), '%')
except Exception:
print("Error occured but ignored")
print(str(Exception))
continue
class Netz(nn.Module):
def __init__(self):
super(Netz, self).__init__()
self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
self.conv2 = nn.Conv2d(6, 12, kernel_size=5)
self.conv3 = nn.Conv2d(12, 18, kernel_size=5)
self.conv4 = nn.Conv2d(18, 24, kernel_size=5)
self.fc1 = nn.Linear(3456, 1000)
self.fc2 = nn.Linear(1000, 2)
def forward(self, x):
x = self.conv1(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv2(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv3(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv4(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = x.view(-1,3456)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return torch.sigmoid(x)
model = Netz()
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
optimizer = optim.Adadelta(model.parameters(), lr=10)
def train(epoch):
global model
model.train()
batch_idx = 0
for data, target in train_data:
batch_idx += 1
data = data.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
target = torch.Tensor(target).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
data = Variable(data)
target = Variable(target)
optimizer.zero_grad()
output = model(data)
criterion = F.binary_cross_entropy
loss = criterion(output, target)
loss.backward()
optimizer.step()
print('Train Epoch: '+ str(epoch) + '\tLoss: ' + str(loss.data.item()) )
def test():
global model
model.eval()
files = listdir(TESTDATAPATH)
f = random.choice(files)
img = Image.open(TESTDATAPATH + f)
img_eval_tensor = transforms(img)
img_eval_tensor.unsqueeze_(0)
data = Variable(img_eval_tensor.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) )
out = model(data)
string_prediction = str(out.data.max(0, keepdim=True)[1])
print(string_prediction[9:10])
for epoch in range(1,4):
train(epoch)
i = 100
while i > 0:
test()
i -= 1
在 TRAINDATAPATH 中有数千张文件名为 "obj_XXX.jpg" 的狗图像和一些其他文件名为 "obj" 的不带狗的图像。
在 TESTDATAPATH 中只是随机图像,一些有狗,一些没有。 NN 将它们全部分类为 "not including dogs" 或“0”,这是不正确的。
感谢您的帮助!
您正在进行二进制 class化,但您使用的是两个 classes:
isObj = 1 if 'obj' in f else 0
isNotObj = 0 if 'obj' in f else 1
target = [isObj, isNotObj]
二进制情况下,应该是单个class,其中1表示是狗,0表示不是。你已经这样做了,但是两次。您可以完全删除 isNotObj
,只保留 isObj
.
您需要相应地调整模型,使其仅预测 isObj
,因此 fc2
应该只有 1 个 class 作为输出:
self.fc2 = nn.Linear(1000, 1)
在测试阶段需要根据单个class进行预测,可以看做是狗的概率。然后你设置一个阈值,你认为模型有足够的信心它实际上是一只狗。为了使其平衡,阈值为 0.5,因此高于该值的所有内容都是狗,低于它的所有内容都不是。这可以通过 torch.round
:
# Size: [batch_size, 1]
out = model(data)
predictions = torch.round(out)
# Get rid of the singular dimension
# To get size: [batch_size]
predictions = predictions.squeeze(1)
除此之外,10的学习率已经是天文数字了,学习率大于1就无法收敛。更合适的学习率在 0.01 或 0.001 左右。
另外,由于您是 PyTorch 的新手:请不要使用 Variable
,它已被 2 年前发布的 PyTorch 0.4.0 及其所有功能弃用已合并到张量中。