预期 4D 张量作为输入,取而代之的是 2D 张量
Expected 4D tensor as input, got 2D tensor instead
我正在尝试使用 Pytorch预训练网络 VGG16 构建神经网络。
我知道我需要调整网络的分类器部分,所以我冻结 防止反向传播通过它们的参数。
代码:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import datasets, transforms
import torchvision.models as models
from collections import OrderedDict
data_dir = 'flowers'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'
train_transforms = transforms.Compose([transforms.Resize(224),
transforms.RandomRotation(30),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
validn_transforms = transforms.Compose([transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))])
test_transforms = transforms.Compose([ transforms.Resize(224),
transforms.RandomResizedCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))])
train_data = datasets.ImageFolder(train_dir,
transform=train_transforms)
validn_data = datasets.ImageFolder(valid_dir,
transform=validn_transforms)
test_data = datasets.ImageFolder(test_dir,
transform=test_transforms)
trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
validnloader = torch.utils.data.DataLoader(validn_data, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True)
model = models.vgg16(pretrained=True)
model
for param in model.parameters():
param.requires_grad = False
classifier = nn.Sequential(OrderedDict([
('fc1', nn.Linear(3*224*224, 10000)),
('relu', nn.ReLU()),
('fc2', nn.Linear(10000, 5000)),
('relu', nn.ReLU()),
('fc3', nn.Linear(5000, 102)),
('output', nn.LogSoftmax(dim=1))
]))
model.classifier = classifier
classifier
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
model.cuda()
epochs = 1
steps = 0
training_loss = 0
print_every = 300
for e in range(epochs):
model.train()
for images, labels in iter(trainloader):
steps == 1
images.resize_(32,3*224*224)
inputs = Variable(images.cuda())
targets = Variable(labels.cuda())
optimizer.zero_grad()
output = model.forward(inputs)
loss = criterion(output, targets)
loss.backward()
optimizer.step()
training_loss += loss.data[0]
if steps % print_every == 0:
print("Epoch: {}/{}... ".format(e+1, epochs),
"Loss: {:.4f}".format(training_loss/print_every))
running_loss = 0
Traceback
ValueError Traceback (most recent call last)
<ipython-input-17-30552f4b46e8> in <module>()
15 optimizer.zero_grad()
16
---> 17 output = model.forward(inputs)
18 loss = criterion(output, targets)
19 loss.backward()
/opt/conda/lib/python3.6/site-packages/torchvision-0.2.0-py3.6.egg/torchvision/models/vgg.py in forward(self, x)
39
40 def forward(self, x):
---> 41 x = self.features(x)
42 x = x.view(x.size(0), -1)
43 x = self.classifier(x)
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
65 def forward(self, input):
66 for module in self._modules.values():
---> 67 input = module(input)
68 return input
69
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/conv.py in forward(self, input)
275 def forward(self, input):
276 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 277 self.padding, self.dilation, self.groups)
278
279
/opt/conda/lib/python3.6/site-packages/torch/nn/functional.py in conv2d(input, weight, bias, stride, padding, dilation, groups)
83 """
84 if input is not None and input.dim() != 4:
---> 85 raise ValueError("Expected 4D tensor as input, got {}D tensor instead.".format(input.dim()))
86
87 f = _ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
ValueError: Expected 4D tensor as input, got 2D tensor instead.
可能是因为我在 layer 定义中使用了 Linear 操作?
您的网络有两个问题 -
您创建了自己的分类器,其第一层接受大小为 (3*224*224) 的输入,但这不是 vgg16 特征部分的输出大小。特征输出大小为 (25088) 的张量
您正在将输入的大小调整为形状为 (3*224*224)
的张量(对于每个批次),但 vgg16 的特征部分需要 (3, 224, 224)
的输入。您的自定义分类器位于特征之后,因此您需要为特征而不是分类器准备输入。
解决方案
要解决第一个问题,您需要将分类器的定义更改为 -
classifier = nn.Sequential(OrderedDict([
('fc1', nn.Linear(25088, 10000)),
('relu', nn.ReLU()),
('fc2', nn.Linear(10000, 5000)),
('relu', nn.ReLU()),
('fc3', nn.Linear(5000, 102)),
('output', nn.LogSoftmax(dim=1))
]))
要解决第二个问题,将images.resize_(32,3*224*224)
更改为images.resize_(32, 3, 224, 224)
。
P.S。 - 忠告 - 你的分类器的第一层输出 10000 个单位是非常大的。您应该像在原始分类器中那样将其保持在 4000 左右(如果您仅对第一层使用原始权重会更好,因为随着时间的推移,这些也被证明是很好的特征)
我正在尝试使用 Pytorch预训练网络 VGG16 构建神经网络。
我知道我需要调整网络的分类器部分,所以我冻结 防止反向传播通过它们的参数。
代码:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import datasets, transforms
import torchvision.models as models
from collections import OrderedDict
data_dir = 'flowers'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'
train_transforms = transforms.Compose([transforms.Resize(224),
transforms.RandomRotation(30),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
validn_transforms = transforms.Compose([transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))])
test_transforms = transforms.Compose([ transforms.Resize(224),
transforms.RandomResizedCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))])
train_data = datasets.ImageFolder(train_dir,
transform=train_transforms)
validn_data = datasets.ImageFolder(valid_dir,
transform=validn_transforms)
test_data = datasets.ImageFolder(test_dir,
transform=test_transforms)
trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
validnloader = torch.utils.data.DataLoader(validn_data, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True)
model = models.vgg16(pretrained=True)
model
for param in model.parameters():
param.requires_grad = False
classifier = nn.Sequential(OrderedDict([
('fc1', nn.Linear(3*224*224, 10000)),
('relu', nn.ReLU()),
('fc2', nn.Linear(10000, 5000)),
('relu', nn.ReLU()),
('fc3', nn.Linear(5000, 102)),
('output', nn.LogSoftmax(dim=1))
]))
model.classifier = classifier
classifier
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
model.cuda()
epochs = 1
steps = 0
training_loss = 0
print_every = 300
for e in range(epochs):
model.train()
for images, labels in iter(trainloader):
steps == 1
images.resize_(32,3*224*224)
inputs = Variable(images.cuda())
targets = Variable(labels.cuda())
optimizer.zero_grad()
output = model.forward(inputs)
loss = criterion(output, targets)
loss.backward()
optimizer.step()
training_loss += loss.data[0]
if steps % print_every == 0:
print("Epoch: {}/{}... ".format(e+1, epochs),
"Loss: {:.4f}".format(training_loss/print_every))
running_loss = 0
Traceback
ValueError Traceback (most recent call last)
<ipython-input-17-30552f4b46e8> in <module>()
15 optimizer.zero_grad()
16
---> 17 output = model.forward(inputs)
18 loss = criterion(output, targets)
19 loss.backward()
/opt/conda/lib/python3.6/site-packages/torchvision-0.2.0-py3.6.egg/torchvision/models/vgg.py in forward(self, x)
39
40 def forward(self, x):
---> 41 x = self.features(x)
42 x = x.view(x.size(0), -1)
43 x = self.classifier(x)
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
65 def forward(self, input):
66 for module in self._modules.values():
---> 67 input = module(input)
68 return input
69
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/conv.py in forward(self, input)
275 def forward(self, input):
276 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 277 self.padding, self.dilation, self.groups)
278
279
/opt/conda/lib/python3.6/site-packages/torch/nn/functional.py in conv2d(input, weight, bias, stride, padding, dilation, groups)
83 """
84 if input is not None and input.dim() != 4:
---> 85 raise ValueError("Expected 4D tensor as input, got {}D tensor instead.".format(input.dim()))
86
87 f = _ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
ValueError: Expected 4D tensor as input, got 2D tensor instead.
可能是因为我在 layer 定义中使用了 Linear 操作?
您的网络有两个问题 -
您创建了自己的分类器,其第一层接受大小为 (3*224*224) 的输入,但这不是 vgg16 特征部分的输出大小。特征输出大小为 (25088) 的张量
您正在将输入的大小调整为形状为
(3*224*224)
的张量(对于每个批次),但 vgg16 的特征部分需要(3, 224, 224)
的输入。您的自定义分类器位于特征之后,因此您需要为特征而不是分类器准备输入。
解决方案
要解决第一个问题,您需要将分类器的定义更改为 -
classifier = nn.Sequential(OrderedDict([
('fc1', nn.Linear(25088, 10000)),
('relu', nn.ReLU()),
('fc2', nn.Linear(10000, 5000)),
('relu', nn.ReLU()),
('fc3', nn.Linear(5000, 102)),
('output', nn.LogSoftmax(dim=1))
]))
要解决第二个问题,将images.resize_(32,3*224*224)
更改为images.resize_(32, 3, 224, 224)
。
P.S。 - 忠告 - 你的分类器的第一层输出 10000 个单位是非常大的。您应该像在原始分类器中那样将其保持在 4000 左右(如果您仅对第一层使用原始权重会更好,因为随着时间的推移,这些也被证明是很好的特征)