如何通过一个损失函数拟合具有 2 个输出的模型?
How can I fit a model with 2 outputs thru one loss function?
我有一个模型必须 return 坐标,然后它必须 return 对它有信心。我的损失函数必须考虑目标坐标和目标可用性。这是我的损失函数的样子:
def loss(targets, target_availabilities, preds, confidences):
# my loss function goes here
return loss
TensorFlow 的函数 API 展示了如何通过单独的损失函数(或具有 return 2 个损失值的相同损失函数传递 2 个不同的输出,每对 y_true
和 y_pred
)。我应该如何编译和拟合我的模型,以便它通过单个损失函数获取目标、target_availabilities、预测和置信度?
我建议使用自定义训练循环来实现这一点。它允许更大的灵活性。只要你 return 一个值,你就可以在你的损失函数中执行任何类型的计算。假设您想这样做:
transformed_output = (y_pred * confidence) - availability
您可以在自定义损失函数中实现这一点(假设您的神经网络架构 returns 这三个值):
def compute_loss(model, x, y, training):
out, avail, conf = model(inputs=x, training=training)
transformed_output = tf.add(tf.multiply(out, conf), avail)
loss = loss_object(y_true=y, y_pred=transformed_output)
return loss
这将 return 一个值,无论它是什么,Tensorflow 都会尝试最小化这个值。
这是一个完整的例子。假设这是“可用性”:
<tf.Tensor: shape=(1, 10), dtype=float32,
numpy=array([[0., 0., 0., 0., 1., 0., 0., 0., 1., 1.]], dtype=float32)>
这是confidences
:
<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[0.09586799, 0.03268242, 0.04225421, 0.4026084 , 0.5088273 ,
0.38777208, 0.53815687, 0.41644037, 0.5709661 , 0.7587745 ]],
dtype=float32)>
让我们训练一个 CNN 来根据这个特殊的损失函数对 MNIST 进行分类。
import tensorflow as tf
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()
unsqueeze = lambda x, y: (tf.expand_dims(
tf.divide(
tf.cast(x, tf.float32), 255), -1),
tf.one_hot(y, depth=10))
train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain)).\
shuffle(64).\
batch(64).\
map(unsqueeze).\
prefetch(1)
test = tf.data.Dataset.from_tensor_slices((xtest, ytest)).\
shuffle(64).\
batch(64).\
map(unsqueeze).\
prefetch(1)
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3),
strides=(1, 1),
input_shape=(28, 28, 1))
self.maxp1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
self.conv2 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3),
strides=(1, 1))
self.maxp2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
self.flat1 = tf.keras.layers.Flatten()
self.dens1 = tf.keras.layers.Dense(64, activation='relu')
self.drop1 = tf.keras.layers.Dropout(5e-1)
self.dens3 = tf.keras.layers.Dense(10)
def call(self, x, training=None, **kwargs):
x = self.conv1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.flat1(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens3(x)
availability = tf.cast(tf.random.uniform((len(x), 10), 0, 2,
dtype=tf.int32), tf.float32)
confidences = tf.random.uniform((len(x), 10), 0, 1, dtype=tf.float32)
return x, availability, confidences
model = CNN()
loss_object = tf.losses.CategoricalCrossentropy(from_logits=True)
def compute_loss(model, x, y, training):
out, avail, conf = model(inputs=x, training=training)
transformed_output = tf.add(tf.multiply(out, conf), avail)
loss = loss_object(y_true=y, y_pred=transformed_output)
return loss
def get_grad(model, x, y):
with tf.GradientTape() as tape:
loss = compute_loss(model, x, y, training=False)
return loss, tape.gradient(loss, model.trainable_variables)
optimizer = tf.optimizers.Adam()
verbose = "Epoch {:2d} Loss: {:.3f} TLoss: {:.3f} Acc: {:.2%} TAcc: {:.2%}"
for epoch in range(1, 10 + 1):
train_loss = tf.metrics.Mean()
train_acc = tf.metrics.CategoricalAccuracy()
test_loss = tf.metrics.Mean()
test_acc = tf.metrics.CategoricalAccuracy()
for x, y in train:
loss_value, grads = get_grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.update_state(loss_value)
train_acc.update_state(y, model(x, training=True))
for x, y in test:
loss_value, _ = get_grad(model, x, y)
test_loss.update_state(loss_value)
test_acc.update_state(y, model(x, training=False))
print(verbose.format(epoch,
train_loss.result(),
test_loss.result(),
train_acc.result(),
test_acc.result()))
我有一个模型必须 return 坐标,然后它必须 return 对它有信心。我的损失函数必须考虑目标坐标和目标可用性。这是我的损失函数的样子:
def loss(targets, target_availabilities, preds, confidences):
# my loss function goes here
return loss
TensorFlow 的函数 API 展示了如何通过单独的损失函数(或具有 return 2 个损失值的相同损失函数传递 2 个不同的输出,每对 y_true
和 y_pred
)。我应该如何编译和拟合我的模型,以便它通过单个损失函数获取目标、target_availabilities、预测和置信度?
我建议使用自定义训练循环来实现这一点。它允许更大的灵活性。只要你 return 一个值,你就可以在你的损失函数中执行任何类型的计算。假设您想这样做:
transformed_output = (y_pred * confidence) - availability
您可以在自定义损失函数中实现这一点(假设您的神经网络架构 returns 这三个值):
def compute_loss(model, x, y, training):
out, avail, conf = model(inputs=x, training=training)
transformed_output = tf.add(tf.multiply(out, conf), avail)
loss = loss_object(y_true=y, y_pred=transformed_output)
return loss
这将 return 一个值,无论它是什么,Tensorflow 都会尝试最小化这个值。
这是一个完整的例子。假设这是“可用性”:
<tf.Tensor: shape=(1, 10), dtype=float32,
numpy=array([[0., 0., 0., 0., 1., 0., 0., 0., 1., 1.]], dtype=float32)>
这是confidences
:
<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[0.09586799, 0.03268242, 0.04225421, 0.4026084 , 0.5088273 ,
0.38777208, 0.53815687, 0.41644037, 0.5709661 , 0.7587745 ]],
dtype=float32)>
让我们训练一个 CNN 来根据这个特殊的损失函数对 MNIST 进行分类。
import tensorflow as tf
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()
unsqueeze = lambda x, y: (tf.expand_dims(
tf.divide(
tf.cast(x, tf.float32), 255), -1),
tf.one_hot(y, depth=10))
train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain)).\
shuffle(64).\
batch(64).\
map(unsqueeze).\
prefetch(1)
test = tf.data.Dataset.from_tensor_slices((xtest, ytest)).\
shuffle(64).\
batch(64).\
map(unsqueeze).\
prefetch(1)
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3),
strides=(1, 1),
input_shape=(28, 28, 1))
self.maxp1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
self.conv2 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3),
strides=(1, 1))
self.maxp2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
self.flat1 = tf.keras.layers.Flatten()
self.dens1 = tf.keras.layers.Dense(64, activation='relu')
self.drop1 = tf.keras.layers.Dropout(5e-1)
self.dens3 = tf.keras.layers.Dense(10)
def call(self, x, training=None, **kwargs):
x = self.conv1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.flat1(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens3(x)
availability = tf.cast(tf.random.uniform((len(x), 10), 0, 2,
dtype=tf.int32), tf.float32)
confidences = tf.random.uniform((len(x), 10), 0, 1, dtype=tf.float32)
return x, availability, confidences
model = CNN()
loss_object = tf.losses.CategoricalCrossentropy(from_logits=True)
def compute_loss(model, x, y, training):
out, avail, conf = model(inputs=x, training=training)
transformed_output = tf.add(tf.multiply(out, conf), avail)
loss = loss_object(y_true=y, y_pred=transformed_output)
return loss
def get_grad(model, x, y):
with tf.GradientTape() as tape:
loss = compute_loss(model, x, y, training=False)
return loss, tape.gradient(loss, model.trainable_variables)
optimizer = tf.optimizers.Adam()
verbose = "Epoch {:2d} Loss: {:.3f} TLoss: {:.3f} Acc: {:.2%} TAcc: {:.2%}"
for epoch in range(1, 10 + 1):
train_loss = tf.metrics.Mean()
train_acc = tf.metrics.CategoricalAccuracy()
test_loss = tf.metrics.Mean()
test_acc = tf.metrics.CategoricalAccuracy()
for x, y in train:
loss_value, grads = get_grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.update_state(loss_value)
train_acc.update_state(y, model(x, training=True))
for x, y in test:
loss_value, _ = get_grad(model, x, y)
test_loss.update_state(loss_value)
test_acc.update_state(y, model(x, training=False))
print(verbose.format(epoch,
train_loss.result(),
test_loss.result(),
train_acc.result(),
test_acc.result()))