如何在具有 2 个优化器的循环中调用 "backward"?
How to call "backward" in a loop with 2 optimizers?
我正在尝试更新 2 个网络:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal
import matplotlib.pyplot as plt
from tqdm import tqdm
softplus = torch.nn.Softplus()
class Model_RL(nn.Module):
def __init__(self):
super(Model_RL, self).__init__()
self.fc1 = nn.Linear(3, 20)
self.fc2 = nn.Linear(20, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = softplus(self.fc3(x))
return x
class Model_FA(nn.Module):
def __init__(self):
super(Model_FA, self).__init__()
self.fc1 = nn.Linear(1, 20)
self.fc2 = nn.Linear(20, 30)
self.fc3 = nn.Linear(30, 1)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = softplus(self.fc3(x))
return x
net_RL = Model_RL()
net_FA = Model_FA()
训练循环是
inps = torch.tensor([[1.0]])
y = torch.tensor(10.0)
opt_RL = optim.Adam(net_RL.parameters())
opt_FA = optim.Adam(net_FA.parameters())
baseline = 0
baseline_lr = 0.1
epochs = 100
for _ in tqdm(range(epochs)):
for inp in inps:
with torch.no_grad():
net_FA(inp)
for layer in range(3):
out_RL = net_RL(torch.tensor([1.0,2.0,3.0]))
mu, std = out_RL
dist = Normal(mu, std)
update_values = dist.sample()
log_p = dist.log_prob(update_values).mean()
out = net_FA(inp)
reward = -torch.square((y - out))
baseline = (1 - baseline_lr) * baseline + baseline_lr * reward
loss_RL = - (reward - baseline) * log_p
opt_RL.zero_grad()
opt_FA.zero_grad()
loss_RL.backward()
opt_RL.step()
out = net_FA(inp)
loss_FA = torch.mean(torch.square(y - out))
opt_RL.zero_grad()
opt_FA.zero_grad()
loss_FA.backward()
opt_FA.step()
print("Mean: " + str(mu.detach().numpy()) + ", Goal: " + str(y))
print("Standard deviation: " + str(softplus(std).detach().numpy()) + ", Goal: 0ish")
我遇到了 2 个主要错误:
RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling .backward()...
当我将 retain_graph=True
添加到两个 backward
调用时,我得到以下内容
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [30, 1]], which is output 0 of TBackward, is at version 5; expected version 4 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True)
我的主要问题是我怎样才能让这个培训工作?
但中间问题是:
如果我使用循环,为什么这里需要 retain_graph=True
?来自 here:“不需要使用 retain_graph=True。在每个循环中,都会创建一个新图”
为什么 retain_graph=True
似乎使训练速度明显变慢(如果我删除另一个 backward
调用)?这对我来说真的没有意义,因为在每个时代都应该创建一个新的计算图(而不仅仅是一个正在扩展的计算图)。
我认为 baseline = (1 - baseline_lr) * baseline + baseline_lr * reward
行导致了错误。因为:
baseline
的状态用于获取baseline
的新状态。
- PyTorch 将在图表中跟踪所有这些状态。
backward
将 刷新 图表。
- 时间变量
baseline
- t + 1 将 尝试反向传播 通过时间 baseline
- t.
- 但在时间 - t + 1 时间
baseline
之后的图表 - t 不存在。
- 这会导致错误
解决方法:
因为你没有优化变量baseline
或baseline
后面的任何东西
- 初始化
baseline
为手电筒张量。
- 在更新状态之前将其从图中分离。
试试这个:
# intialize baseline as torch tensor
baseline = torch.tensor(0.)
baseline_lr = 0.1
epochs = 100
for _ in tqdm(range(epochs)):
for inp in inps:
with torch.no_grad():
net_FA(inp)
for layer in range(3):
out_RL = net_RL(torch.tensor([1.0,2.0,3.0]))
mu, std = out_RL
dist = Normal(mu, std)
update_values = dist.sample()
log_p = dist.log_prob(update_values).mean()
out = net_FA(inp)
reward = -torch.square((y - out))
# detach baseline from graph
baseline = (1 - baseline_lr) * baseline.detach() + baseline_lr * reward
loss_RL = - (reward - baseline) * log_p
opt_RL.zero_grad()
opt_FA.zero_grad()
loss_RL.backward()
opt_RL.step()
out = net_FA(inp)
loss_FA = torch.mean(torch.square(y - out))
opt_RL.zero_grad()
opt_FA.zero_grad()
loss_FA.backward()
opt_FA.step()
但实际上我不知道你为什么要更新网络,同样的输入3次?
我正在尝试更新 2 个网络:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal
import matplotlib.pyplot as plt
from tqdm import tqdm
softplus = torch.nn.Softplus()
class Model_RL(nn.Module):
def __init__(self):
super(Model_RL, self).__init__()
self.fc1 = nn.Linear(3, 20)
self.fc2 = nn.Linear(20, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = softplus(self.fc3(x))
return x
class Model_FA(nn.Module):
def __init__(self):
super(Model_FA, self).__init__()
self.fc1 = nn.Linear(1, 20)
self.fc2 = nn.Linear(20, 30)
self.fc3 = nn.Linear(30, 1)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = softplus(self.fc3(x))
return x
net_RL = Model_RL()
net_FA = Model_FA()
训练循环是
inps = torch.tensor([[1.0]])
y = torch.tensor(10.0)
opt_RL = optim.Adam(net_RL.parameters())
opt_FA = optim.Adam(net_FA.parameters())
baseline = 0
baseline_lr = 0.1
epochs = 100
for _ in tqdm(range(epochs)):
for inp in inps:
with torch.no_grad():
net_FA(inp)
for layer in range(3):
out_RL = net_RL(torch.tensor([1.0,2.0,3.0]))
mu, std = out_RL
dist = Normal(mu, std)
update_values = dist.sample()
log_p = dist.log_prob(update_values).mean()
out = net_FA(inp)
reward = -torch.square((y - out))
baseline = (1 - baseline_lr) * baseline + baseline_lr * reward
loss_RL = - (reward - baseline) * log_p
opt_RL.zero_grad()
opt_FA.zero_grad()
loss_RL.backward()
opt_RL.step()
out = net_FA(inp)
loss_FA = torch.mean(torch.square(y - out))
opt_RL.zero_grad()
opt_FA.zero_grad()
loss_FA.backward()
opt_FA.step()
print("Mean: " + str(mu.detach().numpy()) + ", Goal: " + str(y))
print("Standard deviation: " + str(softplus(std).detach().numpy()) + ", Goal: 0ish")
我遇到了 2 个主要错误:
RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling .backward()...
当我将 retain_graph=True
添加到两个 backward
调用时,我得到以下内容
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [30, 1]], which is output 0 of TBackward, is at version 5; expected version 4 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True)
我的主要问题是我怎样才能让这个培训工作?
但中间问题是:
如果我使用循环,为什么这里需要 retain_graph=True
?来自 here:“不需要使用 retain_graph=True。在每个循环中,都会创建一个新图”
为什么 retain_graph=True
似乎使训练速度明显变慢(如果我删除另一个 backward
调用)?这对我来说真的没有意义,因为在每个时代都应该创建一个新的计算图(而不仅仅是一个正在扩展的计算图)。
我认为 baseline = (1 - baseline_lr) * baseline + baseline_lr * reward
行导致了错误。因为:
baseline
的状态用于获取baseline
的新状态。- PyTorch 将在图表中跟踪所有这些状态。
backward
将 刷新 图表。- 时间变量
baseline
- t + 1 将 尝试反向传播 通过时间baseline
- t. - 但在时间 - t + 1 时间
baseline
之后的图表 - t 不存在。 - 这会导致错误
解决方法:
因为你没有优化变量baseline
或baseline
- 初始化
baseline
为手电筒张量。 - 在更新状态之前将其从图中分离。
试试这个:
# intialize baseline as torch tensor
baseline = torch.tensor(0.)
baseline_lr = 0.1
epochs = 100
for _ in tqdm(range(epochs)):
for inp in inps:
with torch.no_grad():
net_FA(inp)
for layer in range(3):
out_RL = net_RL(torch.tensor([1.0,2.0,3.0]))
mu, std = out_RL
dist = Normal(mu, std)
update_values = dist.sample()
log_p = dist.log_prob(update_values).mean()
out = net_FA(inp)
reward = -torch.square((y - out))
# detach baseline from graph
baseline = (1 - baseline_lr) * baseline.detach() + baseline_lr * reward
loss_RL = - (reward - baseline) * log_p
opt_RL.zero_grad()
opt_FA.zero_grad()
loss_RL.backward()
opt_RL.step()
out = net_FA(inp)
loss_FA = torch.mean(torch.square(y - out))
opt_RL.zero_grad()
opt_FA.zero_grad()
loss_FA.backward()
opt_FA.step()
但实际上我不知道你为什么要更新网络,同样的输入3次?