ValueError: tf.function-decorated function tried to create variables on non-first call while using Custom Loss Function
ValueError: tf.function-decorated function tried to create variables on non-first call while using Custom Loss Function
我正在尝试创建一个三元组损失函数来计算两个句子之间的相似度,如下所示:
def TripletLoss(y_true,y_pred, margin=0.25,batch_size = 64):
v1, v2 = y_pred[:,:128],y_pred[:,-128:]
scores = K.dot(v1, K.transpose(v2))
positive = tf.linalg.diag_part(scores)
negative_without_positive = scores - 2 * K.eye(batch_size)
closest_negative = tf.reduce_max(negative_without_positive, axis=1)
negative_zero_on_duplicate = scores * (1.0 - K.eye(batch_size))
mean_negative = K.sum(negative_zero_on_duplicate, axis=1) / (batch_size-1)
triplet_loss1 = K.maximum(0.0, margin - positive + closest_negative)
triplet_loss2 = K.maximum(0.0, margin - positive + mean_negative)
triplet_loss = K.mean(triplet_loss1 + triplet_loss2)
return triplet_loss
我的模型如下:
input1 = keras.Input(shape=(train_data1.shape[1],))
input2 = keras.Input(shape=(train_data1.shape[1],))
encoding1 = base_model(input1)
encoding2 = base_model(input2)
merged = layers.Concatenate()([encoding1, encoding2])
model = models.Model(inputs = [input1, input2], outputs = merged)
基础模型是:
def calculate_mean(x, axis=1):
return K.mean(x, axis=axis)
def normalize(x):
return x / K.sqrt(K.sum(x * x, axis=-1, keepdims=True))
base_model = models.Sequential()
base_model.add(layers.Embedding(input_dim=len(vocab)+2, output_dim=128))
base_model.add(layers.LSTM(128, return_sequences=True))
base_model.add(layers.Lambda(calculate_mean, name='mean'))
base_model.add(layers.Lambda(normalize, name='normalize'))
现在,当我使用那个损失函数编译模型时
model.compile(
optimizer = Adam(0.001),
loss = TripletLoss
)
它没有给出任何错误。但是当我使用 fit 方法训练它时,它给我的错误是:
ValueError: tf.function-decorated function tried to create variables on non-first call.
如果我使用其他损失函数,效果会很好。不知道这里的损失函数有什么问题
也许可以用 tf.eye
尝试以下操作:
import tensorflow as tf
from tensorflow.keras import backend as K
def TripletLoss(margin=0.25):
def triplet(y_true,y_pred):
batch_size = tf.cast(tf.shape(y_true)[0], dtype=tf.float32)
v1, v2 = y_pred[:,:128],y_pred[:,-128:]
scores = K.dot(v1, K.transpose(v2))
positive = tf.linalg.diag_part(scores)
negative_without_positive = scores - 2 * tf.eye(batch_size)
closest_negative = tf.reduce_max(negative_without_positive, axis=1)
negative_zero_on_duplicate = scores * (1.0 - tf.eye(batch_size))
mean_negative = K.sum(negative_zero_on_duplicate, axis=1) / (batch_size-1)
triplet_loss1 = K.maximum(0.0, margin - positive + closest_negative)
triplet_loss2 = K.maximum(0.0, margin - positive + mean_negative)
triplet_loss = K.mean(triplet_loss1 + triplet_loss2)
return triplet_loss
return triplet
triplet_loss = TripletLoss()
def calculate_mean(x, axis=1):
return K.mean(x, axis=axis)
def normalize(x):
return x / K.sqrt(K.sum(x * x, axis=-1, keepdims=True))
base_model = tf.keras.Sequential()
base_model.add(tf.keras.layers.Embedding(input_dim=50, output_dim=128))
base_model.add(tf.keras.layers.LSTM(128, return_sequences=True))
base_model.add(tf.keras.layers.Lambda(calculate_mean, name='mean'))
base_model.add(tf.keras.layers.Lambda(normalize, name='normalize'))
input1 = tf.keras.layers.Input(shape=(50,))
input2 = tf.keras.layers.Input(shape=(50,))
encoding1 = base_model(input1)
encoding2 = base_model(input2)
merged = tf.keras.layers.Concatenate()([encoding1, encoding2])
model = tf.keras.Model(inputs = [input1, input2], outputs = merged)
model.compile(
optimizer = tf.keras.optimizers.Adam(0.001),
loss = triplet_loss
)
x = tf.random.uniform((500, 50), maxval=50, dtype=tf.int32)
y = tf.random.uniform((500, 256))
model.fit([x, x], y, epochs=2, batch_size=64)
Epoch 1/2
8/8 [==============================] - 6s 237ms/step - loss: 0.0037
Epoch 2/2
8/8 [==============================] - 2s 233ms/step - loss: 5.4691e-04
<keras.callbacks.History at 0x7fd249072d50>
我正在尝试创建一个三元组损失函数来计算两个句子之间的相似度,如下所示:
def TripletLoss(y_true,y_pred, margin=0.25,batch_size = 64):
v1, v2 = y_pred[:,:128],y_pred[:,-128:]
scores = K.dot(v1, K.transpose(v2))
positive = tf.linalg.diag_part(scores)
negative_without_positive = scores - 2 * K.eye(batch_size)
closest_negative = tf.reduce_max(negative_without_positive, axis=1)
negative_zero_on_duplicate = scores * (1.0 - K.eye(batch_size))
mean_negative = K.sum(negative_zero_on_duplicate, axis=1) / (batch_size-1)
triplet_loss1 = K.maximum(0.0, margin - positive + closest_negative)
triplet_loss2 = K.maximum(0.0, margin - positive + mean_negative)
triplet_loss = K.mean(triplet_loss1 + triplet_loss2)
return triplet_loss
我的模型如下:
input1 = keras.Input(shape=(train_data1.shape[1],))
input2 = keras.Input(shape=(train_data1.shape[1],))
encoding1 = base_model(input1)
encoding2 = base_model(input2)
merged = layers.Concatenate()([encoding1, encoding2])
model = models.Model(inputs = [input1, input2], outputs = merged)
基础模型是:
def calculate_mean(x, axis=1):
return K.mean(x, axis=axis)
def normalize(x):
return x / K.sqrt(K.sum(x * x, axis=-1, keepdims=True))
base_model = models.Sequential()
base_model.add(layers.Embedding(input_dim=len(vocab)+2, output_dim=128))
base_model.add(layers.LSTM(128, return_sequences=True))
base_model.add(layers.Lambda(calculate_mean, name='mean'))
base_model.add(layers.Lambda(normalize, name='normalize'))
现在,当我使用那个损失函数编译模型时
model.compile(
optimizer = Adam(0.001),
loss = TripletLoss
)
它没有给出任何错误。但是当我使用 fit 方法训练它时,它给我的错误是:
ValueError: tf.function-decorated function tried to create variables on non-first call.
如果我使用其他损失函数,效果会很好。不知道这里的损失函数有什么问题
也许可以用 tf.eye
尝试以下操作:
import tensorflow as tf
from tensorflow.keras import backend as K
def TripletLoss(margin=0.25):
def triplet(y_true,y_pred):
batch_size = tf.cast(tf.shape(y_true)[0], dtype=tf.float32)
v1, v2 = y_pred[:,:128],y_pred[:,-128:]
scores = K.dot(v1, K.transpose(v2))
positive = tf.linalg.diag_part(scores)
negative_without_positive = scores - 2 * tf.eye(batch_size)
closest_negative = tf.reduce_max(negative_without_positive, axis=1)
negative_zero_on_duplicate = scores * (1.0 - tf.eye(batch_size))
mean_negative = K.sum(negative_zero_on_duplicate, axis=1) / (batch_size-1)
triplet_loss1 = K.maximum(0.0, margin - positive + closest_negative)
triplet_loss2 = K.maximum(0.0, margin - positive + mean_negative)
triplet_loss = K.mean(triplet_loss1 + triplet_loss2)
return triplet_loss
return triplet
triplet_loss = TripletLoss()
def calculate_mean(x, axis=1):
return K.mean(x, axis=axis)
def normalize(x):
return x / K.sqrt(K.sum(x * x, axis=-1, keepdims=True))
base_model = tf.keras.Sequential()
base_model.add(tf.keras.layers.Embedding(input_dim=50, output_dim=128))
base_model.add(tf.keras.layers.LSTM(128, return_sequences=True))
base_model.add(tf.keras.layers.Lambda(calculate_mean, name='mean'))
base_model.add(tf.keras.layers.Lambda(normalize, name='normalize'))
input1 = tf.keras.layers.Input(shape=(50,))
input2 = tf.keras.layers.Input(shape=(50,))
encoding1 = base_model(input1)
encoding2 = base_model(input2)
merged = tf.keras.layers.Concatenate()([encoding1, encoding2])
model = tf.keras.Model(inputs = [input1, input2], outputs = merged)
model.compile(
optimizer = tf.keras.optimizers.Adam(0.001),
loss = triplet_loss
)
x = tf.random.uniform((500, 50), maxval=50, dtype=tf.int32)
y = tf.random.uniform((500, 256))
model.fit([x, x], y, epochs=2, batch_size=64)
Epoch 1/2
8/8 [==============================] - 6s 237ms/step - loss: 0.0037
Epoch 2/2
8/8 [==============================] - 2s 233ms/step - loss: 5.4691e-04
<keras.callbacks.History at 0x7fd249072d50>