用一个简单的数据集理解 LSTM

Understanding LSTM with a simple dataset

我想确保我理解 LSTM,所以我使用 Pytorch 框架实现了一个虚拟示例。 作为输入,我使用长度为 10 的连续数字序列,预测值始终是序列的最后一个数字 + 1。例如:
x = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
y = 16

由于这是一项非常简单的预测任务,我希望该模型能够很好地运行,但我发现它的性能非常差。该模型按批次预测一个恒定值,该值在训练过程中不断增加。

我想知道我错过了什么。以下是我编写的代码 - 任何帮助将不胜感激。

from torch.utils.data import Dataset, TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch.nn as nn
import torch

class MyDataset(Dataset):

    def __init__(self):
        pass

    def __getitem__(self, index):
        x = torch.tensor([index-9,index-8,index-7,index-6,index-5,index-4,index-3,index-2,index-1,index])
        y = torch.tensor(index + 1)
        return x,y

    def __len__(self):
        return 1000
class LSTM(nn.Module):
    def __init__(self, hidden_layer_size=1, batch_size = 1):

        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.batch_size = batch_size 
        self.lstm = nn.LSTM(1, hidden_layer_size)
        self.linear = nn.Linear(10, 1)
        self.hidden_cell = (torch.zeros(1,self.batch_size,self.hidden_layer_size),
                            torch.zeros(1,self.batch_size,self.hidden_layer_size))

    def forward(self, input_seq):

        lstm_out, self.hidden_cell = self.lstm(input_seq.view(10 ,self.batch_size, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.squeeze().T)
        return predictions
batch_size = 32
epochs = 1000

train = MyDataset()
sampler = RandomSampler(train)
train_dataloader = DataLoader(train, sampler=sampler, batch_size= batch_size , drop_last = True)

model = LSTM(batch_size = batch_size)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for e in range(epochs):
    for step, batch in enumerate(train_dataloader) :

        seq, labels = batch
        optimizer.zero_grad()

        model.hidden_cell = (torch.zeros(1, batch_size, model.hidden_layer_size),
                             torch.zeros(1, batch_size, model.hidden_layer_size))

        y_pred = model(seq.float())

        print(y_pred)

        single_loss = loss_function(y_pred, labels.float())
        single_loss.backward()
        optimizer.step()

您的转发功能存在多个问题。查看传递给 LSTM 的输入:

input_seq = input_seq.view(10 ,self.batch_size, -1)
print(input_seq[:, 0])

>>> tensor([[168.],
        [ 21.],
        [450.],
        [436.],
        [789.],
        [941.],
        [ -7.],
        [811.],
        [789.],
        [992.]])

这是一系列随机数。您要么必须转置 input_seq 或什至更好,将 batch_first=True 传递给 LSTM 构造函数,然后在将 input_seq 传递给 LSTM 之前仅将 unsqueeze 传递给它。

您还必须更新 lstm_out,现在唯一需要的操作是 reshape 将其 [batch_size x (10 * hidden_size)]

最后,你需要squeeze线性层的输出。

除此之外,LSTM的hidden size太小,用10(甚至100)代替1,模型在1000个epochs才收敛。 这是更新后的代码:

class LSTM(nn.Module):
    def __init__(self, hidden_layer_size=100, batch_size = 1):

        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.batch_size = batch_size 
        self.lstm = nn.LSTM(1, hidden_layer_size, batch_first=True)
        self.linear = nn.Linear(10 * hidden_layer_size, 1)
        self.hidden_cell = (torch.zeros(1,self.batch_size,self.hidden_layer_size),
                            torch.zeros(1,self.batch_size,self.hidden_layer_size))

    def forward(self, input_seq):
        batch_size = input_seq.size(0)
        input_seq = input_seq.unsqueeze(2)
        lstm_out, self.hidden_cell = self.lstm(input_seq, self.hidden_cell)
        lstm_out = lstm_out.reshape(batch_size, -1)
        predictions = self.linear(lstm_out).squeeze()
        return predictions