Keras 使用掩码冻结特定权重
Keras freeze specific weights with mask
我是 Keras 的新手。我想实现一个并非所有权重都会更新的层。例如,在下面的代码中,我希望 dilation
层以一种永远不会更新某些中心权重的方式进行更新。例如,dilation
层中每个特征矩阵(共 1024 个)的形状为 448, 448
,并且所有特征矩阵中心的 8x8
块将永远不会更新,即8x8
块是特征矩阵的(不可训练的)掩码。
input_layer=Input(shape=(896,896,3))
new_layer = Conv2D(32, kernel_size=(3,3), padding="same", activation='relu', kernel_initializer='he_normal')(input_layer)
new_layer = MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='same', data_format=None)(new_layer)
new_layer = Conv2D(64, kernel_size=(3,3), padding='same', activation='relu', kernel_initializer='he_normal')(new_layer)
new_layer = Conv2D(1024, kernel_size=(7,7), dilation_rate=8, padding="same", activation='relu', kernel_initializer='he_normal', name='dialation')(new_layer)
new_layer = Conv2D(32, kernel_size=(1,1), padding="same", activation='relu', kernel_initializer='he_normal')(new_layer)
new_layer = Conv2D(32, kernel_size=(1,1), padding="same", activation='relu', kernel_initializer='he_normal')(new_layer)
model = Model(input_layer, new_layer)
我正在尝试使用 Keras 的 custom layer
[link],但我很难理解。任何人都会请帮助。
更新:
为了更好地理解,我添加了下图。膨胀层包含 1024 个特征。我希望每个特征的中间区域不可训练(静态)。
两种情况都使用此掩码:
mask = np.zeros((1,448,448,1))
mask[:,220:228,220:228] = 1
替换部分功能
如果你用常量值替换部分特征,这意味着该特征将是静态的,但它仍然会参与反向传播(因为权重仍然会对图像的这部分进行乘法和求和,并且有一个连接)
constant = 0 (will annulate kernel, but not bias)
def replace(x):
return x*(1-mask) + constant*mask
#before the dilation layer
new_layer=Lambda(replace)(new_layer)
保持特征值,但停止反向传播
这里dilation layer和further的权重会正常更新,但是dilation layer之前的权重不会受到中心区域的影响。
def stopBackprop(x):
stopped=K.stop_gradients(x)
return x*(1-mask) + stopped*mask
#before the dilation layer
new_layer=Lambda(stopBackprop)(new_layer)
我正在研究聚类权重,然后冻结特定的聚类并训练网络。
我正在尝试使用上述示例冻结此网络中的特定权重。但我不确定如何在 run_certain_weights().
中设置蒙版和自定义图层的形状
这是我正在使用的代码:
from keras.layers import Dense, Flatten, Lambda
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.datasets import mnist
from keras.losses import categorical_crossentropy
from keras.backend import stop_gradient
import numpy as np
def load_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)
return x_train, y_train, x_test, y_test
def run():
x_train, y_train, x_test, y_test = load_data()
model=Sequential(Flatten(input_shape=(28, 28)))
layer = Dense(300, name='dense1', activation='relu')
layer.trainable=True
model.add(layer)
layer2 = Dense(100, name='dense2', activation='relu')
layer2.trainable=False
model.add(layer2)
layer3 = Dense(10, name='dense3', activation='softmax')
model.add(layer3)
model.compile(loss=categorical_crossentropy, optimizer='Adam',metrics ['accuracy'])
print(model.summary())
print("x_train.shape():",x_train.shape)
print("y_train.shape()",y_train.shape)
model.fit(x_train, y_train, epochs=5, verbose=2)
print(model.evaluate(x_test, y_test))
return model
def stopBackprop(x):
stopped=stop_gradient(x)
return x*(1-mask) + stopped*mask
def run_certain_weights():
x_train, y_train, x_test, y_test = load_data()
model=Sequential(Flatten(input_shape=(28, 28)))
mask = np.zeros((300,))
print(mask.shape)
mask[220:228,] = 1
layer = Dense(300, name='dense1', activation='relu')
layer.trainable=False
model.add(layer)
#before the dense2 layer
new_layer=Lambda(stopBackprop)(layer)
model.add(new_layer)
layer2 = Dense(300, name='dense2', activation='relu')
layer2.trainable=True
model.add(layer2)
layer3 = Dense(10, name='dense3', activation='softmax')
model.add(layer3)
model.compile(loss=categorical_crossentropy, optimizer='Adam',metrics = ['accuracy'])
print(model.summary())
print("x_train.shape():",x_train.shape)
print("y_train.shape()",y_train.shape)
model.fit(x_train, y_train, epochs=5, verbose=2)
print(model.evaluate(x_test, y_test))
return model
def freeze(model):
x_train, y_train, x_test, y_test = load_data()
name = 'dense2'
weightsAndBias = model.get_layer(name=name).get_weights()
# freeze the weights of this layer
model.get_layer(name=name).trainable = False
# record the weights before retrain
weights_before = weightsAndBias[0]
# retrain
print("x_train.shape():",x_train.shape)
print("y_train.shape()",y_train.shape)
model.fit(x_train, y_train, verbose=2, epochs=1)
weights_after = model.get_layer(name=name).get_weights()[0]
if (weights_before == weights_after).all():
print('the weights did not change!!!')
else:
print('the weights changed!!!!')
if __name__ == '__main__':
model = run()
freeze(model)
model = run_certain_weights()
freeze(model)
我是 Keras 的新手。我想实现一个并非所有权重都会更新的层。例如,在下面的代码中,我希望 dilation
层以一种永远不会更新某些中心权重的方式进行更新。例如,dilation
层中每个特征矩阵(共 1024 个)的形状为 448, 448
,并且所有特征矩阵中心的 8x8
块将永远不会更新,即8x8
块是特征矩阵的(不可训练的)掩码。
input_layer=Input(shape=(896,896,3))
new_layer = Conv2D(32, kernel_size=(3,3), padding="same", activation='relu', kernel_initializer='he_normal')(input_layer)
new_layer = MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='same', data_format=None)(new_layer)
new_layer = Conv2D(64, kernel_size=(3,3), padding='same', activation='relu', kernel_initializer='he_normal')(new_layer)
new_layer = Conv2D(1024, kernel_size=(7,7), dilation_rate=8, padding="same", activation='relu', kernel_initializer='he_normal', name='dialation')(new_layer)
new_layer = Conv2D(32, kernel_size=(1,1), padding="same", activation='relu', kernel_initializer='he_normal')(new_layer)
new_layer = Conv2D(32, kernel_size=(1,1), padding="same", activation='relu', kernel_initializer='he_normal')(new_layer)
model = Model(input_layer, new_layer)
我正在尝试使用 Keras 的 custom layer
[link],但我很难理解。任何人都会请帮助。
更新: 为了更好地理解,我添加了下图。膨胀层包含 1024 个特征。我希望每个特征的中间区域不可训练(静态)。
两种情况都使用此掩码:
mask = np.zeros((1,448,448,1))
mask[:,220:228,220:228] = 1
替换部分功能
如果你用常量值替换部分特征,这意味着该特征将是静态的,但它仍然会参与反向传播(因为权重仍然会对图像的这部分进行乘法和求和,并且有一个连接)
constant = 0 (will annulate kernel, but not bias)
def replace(x):
return x*(1-mask) + constant*mask
#before the dilation layer
new_layer=Lambda(replace)(new_layer)
保持特征值,但停止反向传播
这里dilation layer和further的权重会正常更新,但是dilation layer之前的权重不会受到中心区域的影响。
def stopBackprop(x):
stopped=K.stop_gradients(x)
return x*(1-mask) + stopped*mask
#before the dilation layer
new_layer=Lambda(stopBackprop)(new_layer)
我正在研究聚类权重,然后冻结特定的聚类并训练网络。
我正在尝试使用上述示例冻结此网络中的特定权重。但我不确定如何在 run_certain_weights().
中设置蒙版和自定义图层的形状这是我正在使用的代码:
from keras.layers import Dense, Flatten, Lambda
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.datasets import mnist
from keras.losses import categorical_crossentropy
from keras.backend import stop_gradient
import numpy as np
def load_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)
return x_train, y_train, x_test, y_test
def run():
x_train, y_train, x_test, y_test = load_data()
model=Sequential(Flatten(input_shape=(28, 28)))
layer = Dense(300, name='dense1', activation='relu')
layer.trainable=True
model.add(layer)
layer2 = Dense(100, name='dense2', activation='relu')
layer2.trainable=False
model.add(layer2)
layer3 = Dense(10, name='dense3', activation='softmax')
model.add(layer3)
model.compile(loss=categorical_crossentropy, optimizer='Adam',metrics ['accuracy'])
print(model.summary())
print("x_train.shape():",x_train.shape)
print("y_train.shape()",y_train.shape)
model.fit(x_train, y_train, epochs=5, verbose=2)
print(model.evaluate(x_test, y_test))
return model
def stopBackprop(x):
stopped=stop_gradient(x)
return x*(1-mask) + stopped*mask
def run_certain_weights():
x_train, y_train, x_test, y_test = load_data()
model=Sequential(Flatten(input_shape=(28, 28)))
mask = np.zeros((300,))
print(mask.shape)
mask[220:228,] = 1
layer = Dense(300, name='dense1', activation='relu')
layer.trainable=False
model.add(layer)
#before the dense2 layer
new_layer=Lambda(stopBackprop)(layer)
model.add(new_layer)
layer2 = Dense(300, name='dense2', activation='relu')
layer2.trainable=True
model.add(layer2)
layer3 = Dense(10, name='dense3', activation='softmax')
model.add(layer3)
model.compile(loss=categorical_crossentropy, optimizer='Adam',metrics = ['accuracy'])
print(model.summary())
print("x_train.shape():",x_train.shape)
print("y_train.shape()",y_train.shape)
model.fit(x_train, y_train, epochs=5, verbose=2)
print(model.evaluate(x_test, y_test))
return model
def freeze(model):
x_train, y_train, x_test, y_test = load_data()
name = 'dense2'
weightsAndBias = model.get_layer(name=name).get_weights()
# freeze the weights of this layer
model.get_layer(name=name).trainable = False
# record the weights before retrain
weights_before = weightsAndBias[0]
# retrain
print("x_train.shape():",x_train.shape)
print("y_train.shape()",y_train.shape)
model.fit(x_train, y_train, verbose=2, epochs=1)
weights_after = model.get_layer(name=name).get_weights()[0]
if (weights_before == weights_after).all():
print('the weights did not change!!!')
else:
print('the weights changed!!!!')
if __name__ == '__main__':
model = run()
freeze(model)
model = run_certain_weights()
freeze(model)