在keras中重用函数的权重
Reuse weights of a function in keras
我有一个函数叫做 block
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
def block(X, classes):
X = layers.Conv2D(filters = 3, kernel_size = (1, 1), strides = (1,1), name = 'a', kernel_initializer = glorot_uniform())(X)
X = layers.Activation('relu')(X)
X = layers.Flatten()(X)
X = layers.Dense(classes, activation='linear', name='fc', kernel_initializer = glorot_uniform())(X)
return X
block
是一个神经网络模块。我想为多个输入重用 block
的权重。例如,假设我有两个输入:input1
、input2
。我将如何通过 block
传递它们,以便权重再次用于 input2
。下面的代码不起作用,因为它为 B
.
初始化了一组新的权重
input1 = layers.Input((64, 64, 3))
input2 = layers.Input((64, 64, 3))
A = block(input1, 10)
B = block(input2, 10)
print(A)
print(B)
如果您使 block
成为继承自 tf.keras.Model
的 class 或将其更改为 returns 成为 Sequential
模型,这可能会有所帮助与所有你想要的层。目前,您似乎没有在任何地方节省体重。
例如:
A = tf.keras.Sequential([
# ... your layers
])
B = tf.keras.Sequential([
# ... your layers
])
# Or use:
def block(params):
model = tf.keras.Sequential([
# ... your layers with params
])
return model
A = block(params)
B = block(params)
# Or use
class block(tf.keras.Model):
def __init__(self, params):
super(block, self).__init__()
self.layer1 = # ... first layer
# ... your layers
def call(self, inputs):
X = self.layer1(inputs)
# ... the rest of your layers
return X
然后您应该能够像这样获得图层权重 A.layers[layer_number].get_weights()
并使用 B.layers[layer_number].set_weights([np_weight_arr])
设置权重。
此外,我知道一些单独的图层接受 weights
参数,但我必须仔细检查它是否适用于您提到的所有图层。我知道你可以做类似 layers.Embedding(vocab_size+1, emb_dim, weights=[embedding_matrix], input_length=maxlength)
的事情,其中 embedding_matrix
是一个 numpy 数组。
最后,如果你可以重复使用 A,那将是最简单的解决方案。
重用的一种选择是使用共享模型。我们可以从 block()
中创建一个单独的模型,我们可以随时重复使用它。
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.initializers import glorot_uniform
# this is my block
def block(X, classes):
X = layers.Conv2D(filters = 3, kernel_size = (1, 1), strides = (1,1), kernel_initializer = glorot_uniform())(X)
X = layers.Activation('relu')(X)
X = layers.Flatten()(X)
X = layers.Dense(classes, activation='linear', kernel_initializer = glorot_uniform())(X)
return X
# block model
ip_base = layers.Input((64, 64, 3)) # decide on it
op = block(ip_base, 3)
block_model = models.Model(ip_base, op)
block_model_weights = block_model.get_weights() # we can save this as pickle and later load for another model
print(block_model_weights)
# now we can save/re-use the weight of this model anytime we want
# secondary
input1 = layers.Input((64, 64, 3))
input2 = layers.Input((64, 64, 3))
A = block_model(input1)
B = block_model(input2)
model = models.Model([input1, input2], [A, B])
tf.keras.utils.plot_model(
model)
输出:
[array([[[[ 0.07414746, -0.41065478, -0.5521915 ],
[-0.6518545 , 0.09209108, 0.9978211 ],
[ 0.8638036 , 0.5552795 , 0.2620473 ]]]], dtype=float32), array([0., 0., 0.], dtype=float32), array([[-0.02172941, 0.00599053, 0.00549869],
[-0.01580501, -0.01591844, 0.0140271 ],
[ 0.0102962 , 0.01141792, -0.00722483],
...,
[ 0.00119783, 0.0168376 , -0.00014478],
[ 0.00626496, -0.0100358 , 0.01791514],
[-0.00650628, -0.00537593, 0.00106668]], dtype=float32), array([0., 0., 0.], dtype=float32)]
如您所见,如果我们从基础创建一个 Model
对象,然后通过它传递任何输入,权重将被共享。
如果我们想要加载预训练的 block
模型,然后使用其先前的权重并传递我们的输入之一,我们可以简单地执行此操作。
# save the weights of the block model
weights = np.array(block_model.get_weights())
np.save('block_weights.npy', weights, allow_pickle = True)
# now we create another new block model
# block model
ip_base = layers.Input((64, 64, 3)) # decide on it
op = block(ip_base, 3)
new_block_model = models.Model(ip_base, op)
# currently it's randomly initialized but we can take the previous weights and load that in the current model
# load weight
old_weights = np.load('block_weights.npy', allow_pickle = True)
new_block_model.set_weights( old_weights )
# compare all of the weights
print(weights)
print('---------------')
print(old_weights)
print('---------------')
print(new_block_model.get_weights())
输出:
[array([[[[ 0.38135457, -0.28602505, 0.6248524 ],
[-0.10373783, 0.20868587, -0.0295043 ],
[ 0.073596 , -0.85106397, 0.86780167]]]], dtype=float32)
array([0., 0., 0.], dtype=float32)
array([[-0.00512073, 0.00298601, 0.015619 ],
[ 0.01500274, 0.01736909, -0.0106204 ],
[ 0.00690563, 0.00548493, 0.00449893],
...,
[-0.00202469, -0.00226198, 0.0212799 ],
[-0.01203138, -0.00065516, -0.01211848],
[-0.01238732, -0.00368575, 0.02146613]], dtype=float32)
array([0., 0., 0.], dtype=float32)]
---------------
[array([[[[ 0.38135457, -0.28602505, 0.6248524 ],
[-0.10373783, 0.20868587, -0.0295043 ],
[ 0.073596 , -0.85106397, 0.86780167]]]], dtype=float32)
array([0., 0., 0.], dtype=float32)
array([[-0.00512073, 0.00298601, 0.015619 ],
[ 0.01500274, 0.01736909, -0.0106204 ],
[ 0.00690563, 0.00548493, 0.00449893],
...,
[-0.00202469, -0.00226198, 0.0212799 ],
[-0.01203138, -0.00065516, -0.01211848],
[-0.01238732, -0.00368575, 0.02146613]], dtype=float32)
array([0., 0., 0.], dtype=float32)]
---------------
[array([[[[ 0.38135457, -0.28602505, 0.6248524 ],
[-0.10373783, 0.20868587, -0.0295043 ],
[ 0.073596 , -0.85106397, 0.86780167]]]], dtype=float32), array([0., 0., 0.], dtype=float32), array([[-0.00512073, 0.00298601, 0.015619 ],
[ 0.01500274, 0.01736909, -0.0106204 ],
[ 0.00690563, 0.00548493, 0.00449893],
...,
[-0.00202469, -0.00226198, 0.0212799 ],
[-0.01203138, -0.00065516, -0.01211848],
[-0.01238732, -0.00368575, 0.02146613]], dtype=float32), array([0., 0., 0.], dtype=float32)]
我有一个函数叫做 block
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
def block(X, classes):
X = layers.Conv2D(filters = 3, kernel_size = (1, 1), strides = (1,1), name = 'a', kernel_initializer = glorot_uniform())(X)
X = layers.Activation('relu')(X)
X = layers.Flatten()(X)
X = layers.Dense(classes, activation='linear', name='fc', kernel_initializer = glorot_uniform())(X)
return X
block
是一个神经网络模块。我想为多个输入重用 block
的权重。例如,假设我有两个输入:input1
、input2
。我将如何通过 block
传递它们,以便权重再次用于 input2
。下面的代码不起作用,因为它为 B
.
input1 = layers.Input((64, 64, 3))
input2 = layers.Input((64, 64, 3))
A = block(input1, 10)
B = block(input2, 10)
print(A)
print(B)
如果您使 block
成为继承自 tf.keras.Model
的 class 或将其更改为 returns 成为 Sequential
模型,这可能会有所帮助与所有你想要的层。目前,您似乎没有在任何地方节省体重。
例如:
A = tf.keras.Sequential([
# ... your layers
])
B = tf.keras.Sequential([
# ... your layers
])
# Or use:
def block(params):
model = tf.keras.Sequential([
# ... your layers with params
])
return model
A = block(params)
B = block(params)
# Or use
class block(tf.keras.Model):
def __init__(self, params):
super(block, self).__init__()
self.layer1 = # ... first layer
# ... your layers
def call(self, inputs):
X = self.layer1(inputs)
# ... the rest of your layers
return X
然后您应该能够像这样获得图层权重 A.layers[layer_number].get_weights()
并使用 B.layers[layer_number].set_weights([np_weight_arr])
设置权重。
此外,我知道一些单独的图层接受 weights
参数,但我必须仔细检查它是否适用于您提到的所有图层。我知道你可以做类似 layers.Embedding(vocab_size+1, emb_dim, weights=[embedding_matrix], input_length=maxlength)
的事情,其中 embedding_matrix
是一个 numpy 数组。
最后,如果你可以重复使用 A,那将是最简单的解决方案。
重用的一种选择是使用共享模型。我们可以从 block()
中创建一个单独的模型,我们可以随时重复使用它。
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.initializers import glorot_uniform
# this is my block
def block(X, classes):
X = layers.Conv2D(filters = 3, kernel_size = (1, 1), strides = (1,1), kernel_initializer = glorot_uniform())(X)
X = layers.Activation('relu')(X)
X = layers.Flatten()(X)
X = layers.Dense(classes, activation='linear', kernel_initializer = glorot_uniform())(X)
return X
# block model
ip_base = layers.Input((64, 64, 3)) # decide on it
op = block(ip_base, 3)
block_model = models.Model(ip_base, op)
block_model_weights = block_model.get_weights() # we can save this as pickle and later load for another model
print(block_model_weights)
# now we can save/re-use the weight of this model anytime we want
# secondary
input1 = layers.Input((64, 64, 3))
input2 = layers.Input((64, 64, 3))
A = block_model(input1)
B = block_model(input2)
model = models.Model([input1, input2], [A, B])
tf.keras.utils.plot_model(
model)
输出:
[array([[[[ 0.07414746, -0.41065478, -0.5521915 ],
[-0.6518545 , 0.09209108, 0.9978211 ],
[ 0.8638036 , 0.5552795 , 0.2620473 ]]]], dtype=float32), array([0., 0., 0.], dtype=float32), array([[-0.02172941, 0.00599053, 0.00549869],
[-0.01580501, -0.01591844, 0.0140271 ],
[ 0.0102962 , 0.01141792, -0.00722483],
...,
[ 0.00119783, 0.0168376 , -0.00014478],
[ 0.00626496, -0.0100358 , 0.01791514],
[-0.00650628, -0.00537593, 0.00106668]], dtype=float32), array([0., 0., 0.], dtype=float32)]
如您所见,如果我们从基础创建一个 Model
对象,然后通过它传递任何输入,权重将被共享。
如果我们想要加载预训练的 block
模型,然后使用其先前的权重并传递我们的输入之一,我们可以简单地执行此操作。
# save the weights of the block model
weights = np.array(block_model.get_weights())
np.save('block_weights.npy', weights, allow_pickle = True)
# now we create another new block model
# block model
ip_base = layers.Input((64, 64, 3)) # decide on it
op = block(ip_base, 3)
new_block_model = models.Model(ip_base, op)
# currently it's randomly initialized but we can take the previous weights and load that in the current model
# load weight
old_weights = np.load('block_weights.npy', allow_pickle = True)
new_block_model.set_weights( old_weights )
# compare all of the weights
print(weights)
print('---------------')
print(old_weights)
print('---------------')
print(new_block_model.get_weights())
输出:
[array([[[[ 0.38135457, -0.28602505, 0.6248524 ],
[-0.10373783, 0.20868587, -0.0295043 ],
[ 0.073596 , -0.85106397, 0.86780167]]]], dtype=float32)
array([0., 0., 0.], dtype=float32)
array([[-0.00512073, 0.00298601, 0.015619 ],
[ 0.01500274, 0.01736909, -0.0106204 ],
[ 0.00690563, 0.00548493, 0.00449893],
...,
[-0.00202469, -0.00226198, 0.0212799 ],
[-0.01203138, -0.00065516, -0.01211848],
[-0.01238732, -0.00368575, 0.02146613]], dtype=float32)
array([0., 0., 0.], dtype=float32)]
---------------
[array([[[[ 0.38135457, -0.28602505, 0.6248524 ],
[-0.10373783, 0.20868587, -0.0295043 ],
[ 0.073596 , -0.85106397, 0.86780167]]]], dtype=float32)
array([0., 0., 0.], dtype=float32)
array([[-0.00512073, 0.00298601, 0.015619 ],
[ 0.01500274, 0.01736909, -0.0106204 ],
[ 0.00690563, 0.00548493, 0.00449893],
...,
[-0.00202469, -0.00226198, 0.0212799 ],
[-0.01203138, -0.00065516, -0.01211848],
[-0.01238732, -0.00368575, 0.02146613]], dtype=float32)
array([0., 0., 0.], dtype=float32)]
---------------
[array([[[[ 0.38135457, -0.28602505, 0.6248524 ],
[-0.10373783, 0.20868587, -0.0295043 ],
[ 0.073596 , -0.85106397, 0.86780167]]]], dtype=float32), array([0., 0., 0.], dtype=float32), array([[-0.00512073, 0.00298601, 0.015619 ],
[ 0.01500274, 0.01736909, -0.0106204 ],
[ 0.00690563, 0.00548493, 0.00449893],
...,
[-0.00202469, -0.00226198, 0.0212799 ],
[-0.01203138, -0.00065516, -0.01211848],
[-0.01238732, -0.00368575, 0.02146613]], dtype=float32), array([0., 0., 0.], dtype=float32)]