构建具有基本事实的非图像分类器
Building a non-image classifier with ground truths
我有一个如下所示的数据集:
标签基本上是我得到的项目列表(比方说,停车场里的汽车),总共有 10 个,标记为 0 到 10。我有 14 个 class es(比方说,14 个不同的汽车品牌)。每个浮点值只是特定项目所属 class 的百分比。例如,项目 2 可能 class 2 的概率为 0.995275:
print(set(list(df['label'])))
> {0, 1, 2, 3, 4, 5, 6, 7, 9}
我的目标是训练一个 class 输出一个从 0 到 14 的整数来预测 class 标签 x 属于什么。
我正在尝试构建一个具有 3 个隐藏层(+ 输入和输出层)的前馈 NN,并采用 15 个输入并输出从 0 到 14 的预测。这就是我到目前为止的设计:
class NNO(nn.Module):
def __init__(self):
super(NNO, self).__init__()
h= [2,1]
self.hidden = nn.Linear(h[0], h[1])
self.hidden = nn.Linear(2,20)
self.hidden = nn.Linear(20,20)
self.output = nn.Linear(20,15)
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim = 1)
def forward(self, y):
x = self.hidden(x)
x = self.sigmoid(x)
x = self.output(x)
x = self.softmax(x)
我的问题是这样的。如何将数据集提供给我的神经网络以开始训练时代?我找不到与此类数据集相关的任何资源。
答案如下:
# First I create some dummy data
label = np.random.randint(0, 14, 1000)
random = np.random.random((1000, 14))
total = pd.DataFrame(data=random, columns=[f'{i}_col' for i in range(14)])
total['label'] = label
'''
From what I understood you need 1 class in output that has the highest probability and hence this is a multi-class classification problem. In my case, I will just use the highest value from `random` as the target class.
'''
class TDataset(torch.utils.data.Dataset):
def __init__(self, df):
self.inputs = df[[f'{i}_col' for i in range(14)] + ['label']].values
self.outputs = df[[f'{i}_col' for i in range(14)]].values
def __len__(self):
return len(self.inputs)
def __getitem__(self, idx):
x = torch.tensor(self.inputs[idx], dtype=torch.float)
y = torch.tensor(np.argmax(self.outputs[idx]))
return x, y
ds = TDataset(total)
dl = torch.utils.data.DataLoader(ds, batch_size=64)
# After doing this I will create a model which takes 15 inputs and
# Give 14 outputs in my case which represent the logits
class NNO(nn.Module):
def __init__(self):
super(NNO, self).__init__()
self.hidden = nn.Linear(15, 20)
self.relu = nn.ReLU()
self.output = nn.Linear(20, 14)
def forward(self, x):
x = self.hidden(x)
x = self.relu(x)
x = self.output(x)
return x
# Now we create the model object
m = NNO()
sample = None
for i in dl:
sample = i
break
print(m(sample[0]).shape) # shape = [64, 14] as desired.
# Now we define the loss function and then the optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(m.parameters())
# Now we define the training loop
for i in range(500): # for 500 epochs
epoch_loss = 0
for idx, data in enumerate(dl):
inputs = data[0]
targets = data[1] # change accordingly for your data
preds = m(inputs)
optimizer.zero_grad()
loss = loss_fn(preds, targets)
epoch_loss += loss
loss.backward()
optimizer.step()
if (i%50 == 0):
print('loss: ', epoch_loss.item() / len(dl))
'''
Now at the time of inference, you just need to apply softmax on the results of your model and select the most probable output.
'''
preds = m(sample[0])
predicted_classes = torch.argmax(torch.nn.functional.softmax(preds), axis=1)
# Here the predicted classes are the desired final output.
我有一个如下所示的数据集:
标签基本上是我得到的项目列表(比方说,停车场里的汽车),总共有 10 个,标记为 0 到 10。我有 14 个 class es(比方说,14 个不同的汽车品牌)。每个浮点值只是特定项目所属 class 的百分比。例如,项目 2 可能 class 2 的概率为 0.995275:
print(set(list(df['label'])))
> {0, 1, 2, 3, 4, 5, 6, 7, 9}
我的目标是训练一个 class 输出一个从 0 到 14 的整数来预测 class 标签 x 属于什么。
我正在尝试构建一个具有 3 个隐藏层(+ 输入和输出层)的前馈 NN,并采用 15 个输入并输出从 0 到 14 的预测。这就是我到目前为止的设计:
class NNO(nn.Module):
def __init__(self):
super(NNO, self).__init__()
h= [2,1]
self.hidden = nn.Linear(h[0], h[1])
self.hidden = nn.Linear(2,20)
self.hidden = nn.Linear(20,20)
self.output = nn.Linear(20,15)
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim = 1)
def forward(self, y):
x = self.hidden(x)
x = self.sigmoid(x)
x = self.output(x)
x = self.softmax(x)
我的问题是这样的。如何将数据集提供给我的神经网络以开始训练时代?我找不到与此类数据集相关的任何资源。
答案如下:
# First I create some dummy data
label = np.random.randint(0, 14, 1000)
random = np.random.random((1000, 14))
total = pd.DataFrame(data=random, columns=[f'{i}_col' for i in range(14)])
total['label'] = label
'''
From what I understood you need 1 class in output that has the highest probability and hence this is a multi-class classification problem. In my case, I will just use the highest value from `random` as the target class.
'''
class TDataset(torch.utils.data.Dataset):
def __init__(self, df):
self.inputs = df[[f'{i}_col' for i in range(14)] + ['label']].values
self.outputs = df[[f'{i}_col' for i in range(14)]].values
def __len__(self):
return len(self.inputs)
def __getitem__(self, idx):
x = torch.tensor(self.inputs[idx], dtype=torch.float)
y = torch.tensor(np.argmax(self.outputs[idx]))
return x, y
ds = TDataset(total)
dl = torch.utils.data.DataLoader(ds, batch_size=64)
# After doing this I will create a model which takes 15 inputs and
# Give 14 outputs in my case which represent the logits
class NNO(nn.Module):
def __init__(self):
super(NNO, self).__init__()
self.hidden = nn.Linear(15, 20)
self.relu = nn.ReLU()
self.output = nn.Linear(20, 14)
def forward(self, x):
x = self.hidden(x)
x = self.relu(x)
x = self.output(x)
return x
# Now we create the model object
m = NNO()
sample = None
for i in dl:
sample = i
break
print(m(sample[0]).shape) # shape = [64, 14] as desired.
# Now we define the loss function and then the optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(m.parameters())
# Now we define the training loop
for i in range(500): # for 500 epochs
epoch_loss = 0
for idx, data in enumerate(dl):
inputs = data[0]
targets = data[1] # change accordingly for your data
preds = m(inputs)
optimizer.zero_grad()
loss = loss_fn(preds, targets)
epoch_loss += loss
loss.backward()
optimizer.step()
if (i%50 == 0):
print('loss: ', epoch_loss.item() / len(dl))
'''
Now at the time of inference, you just need to apply softmax on the results of your model and select the most probable output.
'''
preds = m(sample[0])
predicted_classes = torch.argmax(torch.nn.functional.softmax(preds), axis=1)
# Here the predicted classes are the desired final output.