EarlyStopping 基于 TF/Keras 中可训练变量的收敛
EarlyStopping based on convergence of a trainable variable in TF/Keras
假设我有一个自定义层为我计算损失,使用外部 trainable 变量使用 TF 2.4(是的,我知道这是一个愚蠢的例子和损失,它是只是为了可重复性,实际损失要复杂得多):
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Layer, Input
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
n_col = 10
n_row = 1000
X = np.random.normal(size=(n_row, n_col))
beta = np.arange(10)
y = X @ beta
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
class MyLoss(Layer):
def __init__(self, var1, var2):
super(MyLoss, self).__init__()
self.var1 = tf.Variable(var1)
self.var2 = tf.Variable(var2)
def get_vars(self):
return self.var1, self.var2
def custom_loss(self, y_true, y_pred):
return self.var1 ** 2 * tf.math.reduce_mean(tf.math.square(y_true-y_pred)) + self.var2 ** 2
def call(self, y_true, y_pred):
self.add_loss(self.custom_loss(y_true, y_pred))
return y_pred
inputs = Input(shape=(X_train.shape[1],))
y_input = Input(shape=(1,))
hidden1 = Dense(10)(inputs)
output = Dense(1)(hidden1)
my_loss = MyLoss(0.5, 0.5)(y_input, output) # here can also initialize those var1, var2
model = Model(inputs=[inputs, y_input], outputs=my_loss)
model.compile(optimizer= 'adam')
训练这个模型很简单:
history = model.fit([X_train, y_train], None,
batch_size=32, epochs=100, validation_split=0.1, verbose=0,
callbacks=[EarlyStopping(monitor='val_loss', patience=5)])
如果我们编写自定义回调或逐个训练时期,我们可以看到 var1
和 var2
如何像预期的那样收敛到 0:
var1_list = []
var2_list = []
for i in range(100):
if i % 10 == 0:
print('step %d' % i)
model.fit([X_train, y_train], None,
batch_size=32, epochs=1, validation_split=0.1, verbose=0)
var1, var2 = model.layers[-1].get_vars()
var1_list.append(var1.numpy())
var2_list.append(var2.numpy())
plt.plot(var1_list, label='var1')
plt.plot(var2_list, 'r', label='var2')
plt.legend()
plt.show()
小问题: 如何根据 var1
和var2
(即它们的向量大小,self.var1**2 + self.var2**2
,并再次假设损失要复杂得多,你 不能 只将这个向量大小添加到损失中)?
更长的问题:(如果你有time/patience)
- 是否可以实现自定义
Metric
并让 EarlyStopping
跟踪它?
- 在这种情况下,当所有
mode
“最小”或“最大”时,您如何让 EarlyStopping
专注于“收敛”? (我想知道我们可以扩展 EarlyStopping
而不是扩展 Callback
)
- 我们可以在没有指标的情况下使用自定义回调来做到这一点吗?
- 我们如何结合上面的自定义损失,告诉
EarlyStopping
注意两者,即“如果你没有看到损失的改善和改善,就停止收敛 patience=10"?
好吧,至少对于“较短的问题”来说,这非常简单,遵循 TF 文档中的 this 示例,实施 EarlyStopping
并着重于变量范数:
class EarlyStoppingAtVarsConvergence(tf.keras.callbacks.Callback):
def __init__(self, norm_thresh=0.01, patience=0):
super(EarlyStoppingAtVarsConvergence, self).__init__()
self.norm_thresh = norm_thresh
self.patience = patience
def on_train_begin(self, logs=None):
# The number of epoch it has waited when norm hasn't converged.
self.wait = 0
# The epoch the training stops at.
self.stopped_epoch = 0
# Initialize sigmas norm.
self.vars_norm = self.get_vars_norm()
def get_vars_norm(self):
var1, var2 = model.layers[-1].get_vars()
return var1**2 + var2**2
def on_epoch_end(self, epoch, logs=None):
current_norm = self.get_vars_norm()
if np.abs(current_norm - self.vars_norm) > self.norm_thresh:
self.sigmas_norm = current_norm
self.wait = 0
else:
self.wait += 1
if self.wait >= self.patience:
self.stopped_epoch = epoch
self.model.stop_training = True
def on_train_end(self, logs=None):
if self.stopped_epoch > 0:
print("Epoch %05d: early stopping" % (self.stopped_epoch + 1))
那么模型将是 运行 并且:
history = model.fit([X_train, y_train], None,
batch_size=32, epochs=100, validation_split=0.1, verbose=0,
callbacks=[EarlyStoppingAtVarsConvergence(patience=5)])
假设我有一个自定义层为我计算损失,使用外部 trainable 变量使用 TF 2.4(是的,我知道这是一个愚蠢的例子和损失,它是只是为了可重复性,实际损失要复杂得多):
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Layer, Input
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
n_col = 10
n_row = 1000
X = np.random.normal(size=(n_row, n_col))
beta = np.arange(10)
y = X @ beta
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
class MyLoss(Layer):
def __init__(self, var1, var2):
super(MyLoss, self).__init__()
self.var1 = tf.Variable(var1)
self.var2 = tf.Variable(var2)
def get_vars(self):
return self.var1, self.var2
def custom_loss(self, y_true, y_pred):
return self.var1 ** 2 * tf.math.reduce_mean(tf.math.square(y_true-y_pred)) + self.var2 ** 2
def call(self, y_true, y_pred):
self.add_loss(self.custom_loss(y_true, y_pred))
return y_pred
inputs = Input(shape=(X_train.shape[1],))
y_input = Input(shape=(1,))
hidden1 = Dense(10)(inputs)
output = Dense(1)(hidden1)
my_loss = MyLoss(0.5, 0.5)(y_input, output) # here can also initialize those var1, var2
model = Model(inputs=[inputs, y_input], outputs=my_loss)
model.compile(optimizer= 'adam')
训练这个模型很简单:
history = model.fit([X_train, y_train], None,
batch_size=32, epochs=100, validation_split=0.1, verbose=0,
callbacks=[EarlyStopping(monitor='val_loss', patience=5)])
如果我们编写自定义回调或逐个训练时期,我们可以看到 var1
和 var2
如何像预期的那样收敛到 0:
var1_list = []
var2_list = []
for i in range(100):
if i % 10 == 0:
print('step %d' % i)
model.fit([X_train, y_train], None,
batch_size=32, epochs=1, validation_split=0.1, verbose=0)
var1, var2 = model.layers[-1].get_vars()
var1_list.append(var1.numpy())
var2_list.append(var2.numpy())
plt.plot(var1_list, label='var1')
plt.plot(var2_list, 'r', label='var2')
plt.legend()
plt.show()
小问题: 如何根据 var1
和var2
(即它们的向量大小,self.var1**2 + self.var2**2
,并再次假设损失要复杂得多,你 不能 只将这个向量大小添加到损失中)?
更长的问题:(如果你有time/patience)
- 是否可以实现自定义
Metric
并让EarlyStopping
跟踪它? - 在这种情况下,当所有
mode
“最小”或“最大”时,您如何让EarlyStopping
专注于“收敛”? (我想知道我们可以扩展EarlyStopping
而不是扩展Callback
) - 我们可以在没有指标的情况下使用自定义回调来做到这一点吗?
- 我们如何结合上面的自定义损失,告诉
EarlyStopping
注意两者,即“如果你没有看到损失的改善和改善,就停止收敛 patience=10"?
好吧,至少对于“较短的问题”来说,这非常简单,遵循 TF 文档中的 this 示例,实施 EarlyStopping
并着重于变量范数:
class EarlyStoppingAtVarsConvergence(tf.keras.callbacks.Callback):
def __init__(self, norm_thresh=0.01, patience=0):
super(EarlyStoppingAtVarsConvergence, self).__init__()
self.norm_thresh = norm_thresh
self.patience = patience
def on_train_begin(self, logs=None):
# The number of epoch it has waited when norm hasn't converged.
self.wait = 0
# The epoch the training stops at.
self.stopped_epoch = 0
# Initialize sigmas norm.
self.vars_norm = self.get_vars_norm()
def get_vars_norm(self):
var1, var2 = model.layers[-1].get_vars()
return var1**2 + var2**2
def on_epoch_end(self, epoch, logs=None):
current_norm = self.get_vars_norm()
if np.abs(current_norm - self.vars_norm) > self.norm_thresh:
self.sigmas_norm = current_norm
self.wait = 0
else:
self.wait += 1
if self.wait >= self.patience:
self.stopped_epoch = epoch
self.model.stop_training = True
def on_train_end(self, logs=None):
if self.stopped_epoch > 0:
print("Epoch %05d: early stopping" % (self.stopped_epoch + 1))
那么模型将是 运行 并且:
history = model.fit([X_train, y_train], None,
batch_size=32, epochs=100, validation_split=0.1, verbose=0,
callbacks=[EarlyStoppingAtVarsConvergence(patience=5)])