用一个简单的数据集理解 LSTM
Understanding LSTM with a simple dataset
我想确保我理解 LSTM,所以我使用 Pytorch 框架实现了一个虚拟示例。
作为输入,我使用长度为 10 的连续数字序列,预测值始终是序列的最后一个数字 + 1。例如:
x = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
y = 16
由于这是一项非常简单的预测任务,我希望该模型能够很好地运行,但我发现它的性能非常差。该模型按批次预测一个恒定值,该值在训练过程中不断增加。
我想知道我错过了什么。以下是我编写的代码 - 任何帮助将不胜感激。
from torch.utils.data import Dataset, TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch.nn as nn
import torch
class MyDataset(Dataset):
def __init__(self):
pass
def __getitem__(self, index):
x = torch.tensor([index-9,index-8,index-7,index-6,index-5,index-4,index-3,index-2,index-1,index])
y = torch.tensor(index + 1)
return x,y
def __len__(self):
return 1000
class LSTM(nn.Module):
def __init__(self, hidden_layer_size=1, batch_size = 1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.batch_size = batch_size
self.lstm = nn.LSTM(1, hidden_layer_size)
self.linear = nn.Linear(10, 1)
self.hidden_cell = (torch.zeros(1,self.batch_size,self.hidden_layer_size),
torch.zeros(1,self.batch_size,self.hidden_layer_size))
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(10 ,self.batch_size, -1), self.hidden_cell)
predictions = self.linear(lstm_out.squeeze().T)
return predictions
batch_size = 32
epochs = 1000
train = MyDataset()
sampler = RandomSampler(train)
train_dataloader = DataLoader(train, sampler=sampler, batch_size= batch_size , drop_last = True)
model = LSTM(batch_size = batch_size)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for e in range(epochs):
for step, batch in enumerate(train_dataloader) :
seq, labels = batch
optimizer.zero_grad()
model.hidden_cell = (torch.zeros(1, batch_size, model.hidden_layer_size),
torch.zeros(1, batch_size, model.hidden_layer_size))
y_pred = model(seq.float())
print(y_pred)
single_loss = loss_function(y_pred, labels.float())
single_loss.backward()
optimizer.step()
您的转发功能存在多个问题。查看传递给 LSTM 的输入:
input_seq = input_seq.view(10 ,self.batch_size, -1)
print(input_seq[:, 0])
>>> tensor([[168.],
[ 21.],
[450.],
[436.],
[789.],
[941.],
[ -7.],
[811.],
[789.],
[992.]])
这是一系列随机数。您要么必须转置 input_seq
或什至更好,将 batch_first=True
传递给 LSTM 构造函数,然后在将 input_seq
传递给 LSTM 之前仅将 unsqueeze
传递给它。
您还必须更新 lstm_out
,现在唯一需要的操作是 reshape
将其 [batch_size x (10 * hidden_size)]
。
最后,你需要squeeze
线性层的输出。
除此之外,LSTM的hidden size太小,用10(甚至100)代替1,模型在1000个epochs才收敛。
这是更新后的代码:
class LSTM(nn.Module):
def __init__(self, hidden_layer_size=100, batch_size = 1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.batch_size = batch_size
self.lstm = nn.LSTM(1, hidden_layer_size, batch_first=True)
self.linear = nn.Linear(10 * hidden_layer_size, 1)
self.hidden_cell = (torch.zeros(1,self.batch_size,self.hidden_layer_size),
torch.zeros(1,self.batch_size,self.hidden_layer_size))
def forward(self, input_seq):
batch_size = input_seq.size(0)
input_seq = input_seq.unsqueeze(2)
lstm_out, self.hidden_cell = self.lstm(input_seq, self.hidden_cell)
lstm_out = lstm_out.reshape(batch_size, -1)
predictions = self.linear(lstm_out).squeeze()
return predictions
我想确保我理解 LSTM,所以我使用 Pytorch 框架实现了一个虚拟示例。
作为输入,我使用长度为 10 的连续数字序列,预测值始终是序列的最后一个数字 + 1。例如:
x = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
y = 16
由于这是一项非常简单的预测任务,我希望该模型能够很好地运行,但我发现它的性能非常差。该模型按批次预测一个恒定值,该值在训练过程中不断增加。
我想知道我错过了什么。以下是我编写的代码 - 任何帮助将不胜感激。
from torch.utils.data import Dataset, TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch.nn as nn
import torch
class MyDataset(Dataset):
def __init__(self):
pass
def __getitem__(self, index):
x = torch.tensor([index-9,index-8,index-7,index-6,index-5,index-4,index-3,index-2,index-1,index])
y = torch.tensor(index + 1)
return x,y
def __len__(self):
return 1000
class LSTM(nn.Module):
def __init__(self, hidden_layer_size=1, batch_size = 1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.batch_size = batch_size
self.lstm = nn.LSTM(1, hidden_layer_size)
self.linear = nn.Linear(10, 1)
self.hidden_cell = (torch.zeros(1,self.batch_size,self.hidden_layer_size),
torch.zeros(1,self.batch_size,self.hidden_layer_size))
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(10 ,self.batch_size, -1), self.hidden_cell)
predictions = self.linear(lstm_out.squeeze().T)
return predictions
batch_size = 32
epochs = 1000
train = MyDataset()
sampler = RandomSampler(train)
train_dataloader = DataLoader(train, sampler=sampler, batch_size= batch_size , drop_last = True)
model = LSTM(batch_size = batch_size)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for e in range(epochs):
for step, batch in enumerate(train_dataloader) :
seq, labels = batch
optimizer.zero_grad()
model.hidden_cell = (torch.zeros(1, batch_size, model.hidden_layer_size),
torch.zeros(1, batch_size, model.hidden_layer_size))
y_pred = model(seq.float())
print(y_pred)
single_loss = loss_function(y_pred, labels.float())
single_loss.backward()
optimizer.step()
您的转发功能存在多个问题。查看传递给 LSTM 的输入:
input_seq = input_seq.view(10 ,self.batch_size, -1)
print(input_seq[:, 0])
>>> tensor([[168.],
[ 21.],
[450.],
[436.],
[789.],
[941.],
[ -7.],
[811.],
[789.],
[992.]])
这是一系列随机数。您要么必须转置 input_seq
或什至更好,将 batch_first=True
传递给 LSTM 构造函数,然后在将 input_seq
传递给 LSTM 之前仅将 unsqueeze
传递给它。
您还必须更新 lstm_out
,现在唯一需要的操作是 reshape
将其 [batch_size x (10 * hidden_size)]
。
最后,你需要squeeze
线性层的输出。
除此之外,LSTM的hidden size太小,用10(甚至100)代替1,模型在1000个epochs才收敛。 这是更新后的代码:
class LSTM(nn.Module):
def __init__(self, hidden_layer_size=100, batch_size = 1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.batch_size = batch_size
self.lstm = nn.LSTM(1, hidden_layer_size, batch_first=True)
self.linear = nn.Linear(10 * hidden_layer_size, 1)
self.hidden_cell = (torch.zeros(1,self.batch_size,self.hidden_layer_size),
torch.zeros(1,self.batch_size,self.hidden_layer_size))
def forward(self, input_seq):
batch_size = input_seq.size(0)
input_seq = input_seq.unsqueeze(2)
lstm_out, self.hidden_cell = self.lstm(input_seq, self.hidden_cell)
lstm_out = lstm_out.reshape(batch_size, -1)
predictions = self.linear(lstm_out).squeeze()
return predictions