Normalization of the dataset, Error: all elements of input should be between 0 and 1
Normalization of the dataset, Error: all elements of input should be between 0 and 1
当我尝试执行训练时,PyTorch 中的数据规范化出现问题。首先你需要知道的是,数据集由 3024 个信号 windows(所以 1 个通道)组成,每个信号的长度为 5000 个样本,因此 CSV 文件的尺寸为 5000x3024。每个信号都有 1 个需要预测的标签。
这是我如何加载和规范化数据的代码:
class CSVDataset(Dataset):
# load the dataset
def __init__(self, path, normalize = False):
# load the csv file as a dataframe
df = read_csv(path)
df = df.transpose()
# store the inputs and outputs
self.X = df.values[:, :-1]
self.y = df.values[:, -1]
print("Dataset length: ", self.X.shape[0])
# ensure input data is floats
self.X = self.X.astype(np.float)
self.y = self.y.astype(np.float)
if normalize:
self.X = self.X.reshape(self.X.shape[1], self.X.shape[0])
min_X = np.min(self.X,0) # returns an array of means for each signal window
max_X = np.max(self.X,0)
self.X = (self.X - min_X)/(max_X-min_X)
min_y = np.min(self.y)
max_y = np.max(self.y)
self.y = (self.y - min_y)/(max_y-min_y)
# reshape input data
self.X = self.X.reshape(self.X.shape[0], 1, self.X.shape[1])
self.y = self.y.reshape(self.y.shape[0], 1)
# label encode target and ensure the values are floats
self.y = LabelEncoder().fit_transform(self.y)
self.y = self.y.astype(np.float)
# prepare the dataset
def prepare_data(path):
# load the dataset
dataset = CSVDataset(path, normalize = True)
# calculate split
train, test = dataset.get_splits()
# prepare data loaders
train_dl = DataLoader(train, batch_size=32, shuffle=True)
test_dl = DataLoader(test, batch_size=1024, shuffle=False)
return train_dl, test_dl
而训练方法是:
def train_model(train_dl, model):
# define the optimization
criterion = BCELoss()
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
model = model.float()
# enumerate epochs
for epoch in range(100):
# enumerate mini batches
for i, (inputs, targets) in enumerate(iter(train_dl)):
targets = torch.reshape(targets, (32, 1))
# clear the gradients
optimizer.zero_grad()
# compute the model output
yhat = model(inputs.float())
# calculate loss
loss = criterion(yhat, targets.float())
# credit assignment
loss.backward()
# update model weights
optimizer.step()
我得到的错误在行 loss = criterion(yhat, targets.float())
中,它说:
RuntimeError: all elements of input should be between 0 and 1
我已经尝试检查变量资源管理器中的 X,似乎没有任何值不在 0 和 1 之间。我不知道我在规范化中可能做错了什么。你能帮帮我吗?
内置损失函数参考输入和目标指定预测和label 个实例。错误消息应理解为“标准的输入”即 yhat
,而不是“模型的输入”。
似乎 yhat
不属于 [0, 1]
,而 BCELoss
期望的是概率,而不是对数。你可以
添加一个 sigmoid 层作为模型的最后一层,或者
改用 nn.BCEWithLogitsLoss
,它结合了 sigmoid 和 bce 损失。
当我尝试执行训练时,PyTorch 中的数据规范化出现问题。首先你需要知道的是,数据集由 3024 个信号 windows(所以 1 个通道)组成,每个信号的长度为 5000 个样本,因此 CSV 文件的尺寸为 5000x3024。每个信号都有 1 个需要预测的标签。 这是我如何加载和规范化数据的代码:
class CSVDataset(Dataset):
# load the dataset
def __init__(self, path, normalize = False):
# load the csv file as a dataframe
df = read_csv(path)
df = df.transpose()
# store the inputs and outputs
self.X = df.values[:, :-1]
self.y = df.values[:, -1]
print("Dataset length: ", self.X.shape[0])
# ensure input data is floats
self.X = self.X.astype(np.float)
self.y = self.y.astype(np.float)
if normalize:
self.X = self.X.reshape(self.X.shape[1], self.X.shape[0])
min_X = np.min(self.X,0) # returns an array of means for each signal window
max_X = np.max(self.X,0)
self.X = (self.X - min_X)/(max_X-min_X)
min_y = np.min(self.y)
max_y = np.max(self.y)
self.y = (self.y - min_y)/(max_y-min_y)
# reshape input data
self.X = self.X.reshape(self.X.shape[0], 1, self.X.shape[1])
self.y = self.y.reshape(self.y.shape[0], 1)
# label encode target and ensure the values are floats
self.y = LabelEncoder().fit_transform(self.y)
self.y = self.y.astype(np.float)
# prepare the dataset
def prepare_data(path):
# load the dataset
dataset = CSVDataset(path, normalize = True)
# calculate split
train, test = dataset.get_splits()
# prepare data loaders
train_dl = DataLoader(train, batch_size=32, shuffle=True)
test_dl = DataLoader(test, batch_size=1024, shuffle=False)
return train_dl, test_dl
而训练方法是:
def train_model(train_dl, model):
# define the optimization
criterion = BCELoss()
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
model = model.float()
# enumerate epochs
for epoch in range(100):
# enumerate mini batches
for i, (inputs, targets) in enumerate(iter(train_dl)):
targets = torch.reshape(targets, (32, 1))
# clear the gradients
optimizer.zero_grad()
# compute the model output
yhat = model(inputs.float())
# calculate loss
loss = criterion(yhat, targets.float())
# credit assignment
loss.backward()
# update model weights
optimizer.step()
我得到的错误在行 loss = criterion(yhat, targets.float())
中,它说:
RuntimeError: all elements of input should be between 0 and 1
我已经尝试检查变量资源管理器中的 X,似乎没有任何值不在 0 和 1 之间。我不知道我在规范化中可能做错了什么。你能帮帮我吗?
内置损失函数参考输入和目标指定预测和label 个实例。错误消息应理解为“标准的输入”即 yhat
,而不是“模型的输入”。
似乎 yhat
不属于 [0, 1]
,而 BCELoss
期望的是概率,而不是对数。你可以
添加一个 sigmoid 层作为模型的最后一层,或者
改用
nn.BCEWithLogitsLoss
,它结合了 sigmoid 和 bce 损失。