pytorch 神经网络中的冻结层
freezing layers in a neural network in pytorch
我有一个级联神经网络,第一个网络的输出成为第二个网络的输入。第一个神经网络是预训练的,所以我只用那些预训练的权重初始化它。但是,我想冻结第一个神经网络,以便在训练时仅更新第二个神经网络的权重。我怎样才能做到这一点?我的网络看起来像:
###First network
class LambdaBase(nn.Sequential):
def __init__(self, fn, *args):
super(LambdaBase, self).__init__(*args)
self.lambda_func = fn
def forward_prepare(self, input):
output = []
for module in self._modules.values():
output.append(module(input))
return output if output else input
class Lambda(LambdaBase):
def forward(self, input):
return self.lambda_func(self.forward_prepare(input))
class LambdaMap(LambdaBase):
def forward(self, input):
return list(map(self.lambda_func,self.forward_prepare(input)))
class LambdaReduce(LambdaBase):
def forward(self, input):
return reduce(self.lambda_func,self.forward_prepare(input))
def get_first_model(load_weights = True):
pretrained_model_reloaded_th = nn.Sequential( # Sequential,
nn.Conv2d(4,300,(19, 1)),
nn.BatchNorm2d(300),
nn.ReLU(),
nn.MaxPool2d((3, 1),(3, 1)),
nn.Conv2d(300,200,(11, 1)),
nn.BatchNorm2d(200),
nn.ReLU(),
nn.MaxPool2d((4, 1),(4, 1)),
nn.Conv2d(200,200,(7, 1)),
nn.BatchNorm2d(200),
nn.ReLU(),
nn.MaxPool2d((4, 1),(4, 1)),
Lambda(lambda x: x.view(x.size(0),-1)), # Reshape,
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(2000,1000)), # Linear,
nn.BatchNorm1d(1000,1e-05,0.1,True),#BatchNorm1d,
nn.ReLU(),
nn.Dropout(0.3),
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(1000,1000)), # Linear,
nn.BatchNorm1d(1000,1e-05,0.1,True),#BatchNorm1d,
nn.ReLU(),
nn.Dropout(0.3),
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(1000,164)), # Linear,
nn.Sigmoid(),
)
if load_weights:
sd = torch.load('pretrained_model.pth')
pretrained_model_reloaded_th.load_state_dict(sd)
return pretrained_model_reloaded_th
### second network
def next_model_architecture():
next_model = nn.Sequential(
nn.Linear(164, 64),
nn.ReLU(),
nn.Linear(64, 1),
nn.Sigmoid())
return next_model
### joining two networks
def cascading_model(first_model,next_model):
network = nn.Sequential(first_model, next_model)
return network
first_model = get_first_model(load_weights = True)
next_model = next_model_architecture()
network = cascading_model(first_model,next_model)
如果我这样做:
first_model = first_model.eval()
这会冻结我的第一个神经网络并在训练期间只更新第二个网络的权重吗?
通过将 .requires_grad
设置为 False
来冻结任何参数。通过遍历模块的所有参数(你想冻结)
for p in first_model.parameters():
p.requires_grad = False
您还可以在不使用 requires_grad_
迭代的情况下冻结参数。在你的情况下是:
# Freezing network Sequential at index 0
network[0].requires_grad_(False)
通常在更复杂的网络中,您会有不同的模块。例如,在您的情况下,如果您可以像这样构建网络:
class Network(torch.nn.Module):
def __init__(self, ...):
self.first_model = get_first_model(load_weights = True)
self.next_model = next_model_architecture()
def forward(self, x):
x = self.first_model(x)
x = self.next_model(x)
return x
# Class intance
network = Network(...)
然后你可以像这样冻结 sub-models 中的一个:
# Freezing network submodule: first_model
network.first_model.requires_grad_(False)
我有一个级联神经网络,第一个网络的输出成为第二个网络的输入。第一个神经网络是预训练的,所以我只用那些预训练的权重初始化它。但是,我想冻结第一个神经网络,以便在训练时仅更新第二个神经网络的权重。我怎样才能做到这一点?我的网络看起来像:
###First network
class LambdaBase(nn.Sequential):
def __init__(self, fn, *args):
super(LambdaBase, self).__init__(*args)
self.lambda_func = fn
def forward_prepare(self, input):
output = []
for module in self._modules.values():
output.append(module(input))
return output if output else input
class Lambda(LambdaBase):
def forward(self, input):
return self.lambda_func(self.forward_prepare(input))
class LambdaMap(LambdaBase):
def forward(self, input):
return list(map(self.lambda_func,self.forward_prepare(input)))
class LambdaReduce(LambdaBase):
def forward(self, input):
return reduce(self.lambda_func,self.forward_prepare(input))
def get_first_model(load_weights = True):
pretrained_model_reloaded_th = nn.Sequential( # Sequential,
nn.Conv2d(4,300,(19, 1)),
nn.BatchNorm2d(300),
nn.ReLU(),
nn.MaxPool2d((3, 1),(3, 1)),
nn.Conv2d(300,200,(11, 1)),
nn.BatchNorm2d(200),
nn.ReLU(),
nn.MaxPool2d((4, 1),(4, 1)),
nn.Conv2d(200,200,(7, 1)),
nn.BatchNorm2d(200),
nn.ReLU(),
nn.MaxPool2d((4, 1),(4, 1)),
Lambda(lambda x: x.view(x.size(0),-1)), # Reshape,
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(2000,1000)), # Linear,
nn.BatchNorm1d(1000,1e-05,0.1,True),#BatchNorm1d,
nn.ReLU(),
nn.Dropout(0.3),
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(1000,1000)), # Linear,
nn.BatchNorm1d(1000,1e-05,0.1,True),#BatchNorm1d,
nn.ReLU(),
nn.Dropout(0.3),
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(1000,164)), # Linear,
nn.Sigmoid(),
)
if load_weights:
sd = torch.load('pretrained_model.pth')
pretrained_model_reloaded_th.load_state_dict(sd)
return pretrained_model_reloaded_th
### second network
def next_model_architecture():
next_model = nn.Sequential(
nn.Linear(164, 64),
nn.ReLU(),
nn.Linear(64, 1),
nn.Sigmoid())
return next_model
### joining two networks
def cascading_model(first_model,next_model):
network = nn.Sequential(first_model, next_model)
return network
first_model = get_first_model(load_weights = True)
next_model = next_model_architecture()
network = cascading_model(first_model,next_model)
如果我这样做:
first_model = first_model.eval()
这会冻结我的第一个神经网络并在训练期间只更新第二个网络的权重吗?
通过将 .requires_grad
设置为 False
来冻结任何参数。通过遍历模块的所有参数(你想冻结)
for p in first_model.parameters():
p.requires_grad = False
您还可以在不使用 requires_grad_
迭代的情况下冻结参数。在你的情况下是:
# Freezing network Sequential at index 0
network[0].requires_grad_(False)
通常在更复杂的网络中,您会有不同的模块。例如,在您的情况下,如果您可以像这样构建网络:
class Network(torch.nn.Module):
def __init__(self, ...):
self.first_model = get_first_model(load_weights = True)
self.next_model = next_model_architecture()
def forward(self, x):
x = self.first_model(x)
x = self.next_model(x)
return x
# Class intance
network = Network(...)
然后你可以像这样冻结 sub-models 中的一个:
# Freezing network submodule: first_model
network.first_model.requires_grad_(False)