此 Keras 模型在创建时有效,但在加载时失败。怀疑张量分裂
This Keras model works when created, but fails when loaded. Tensor splitting suspected
我正在试验 LSTM,具体来说,将一个序列输入一个 LSTM,将状态转移到另一个 LSTM,并对序列进行解码。我在两个 LSTM 之间添加了一个自动编码器,通过较低维的潜在 space 对传输状态进行编码和解码。
这在我创建模型并进行拟合时效果很好。但是,如果我保存这个模型,然后尝试继续训练它,或者甚至只是在没有额外训练的情况下使用它,模型不会 运行 并且我会收到以下警告:
Traceback (most recent call last):
File "s2s_AE_2.py", line 140, in <module>
model.fit_generator(train_generator(),callbacks=[checkpointer], steps_per_epoch=30, epochs=2000, verbose=1,validation_data=val_generator(),validation_steps=30)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 2224, in fit_generator
class_weight=class_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1877, in train_on_batch
class_weight=class_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1476, in _standardize_user_data
exception_prefix='input')
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 86, in _standardize_input_data
str(len(data)) + ' arrays: ' + str(data)[:200] + '...')
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 2 arrays: [array([[[ 0.47338937, 0.75865918, 0.37731877, 0.63840222,
0.14653083],
[ 0.52119932, 0.78308798, 0.45885839, 0.66738276,
0.20393343],
[ 0.5674261 , 0.806364...
我的代码如下:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, TimeDistributed,Lambda, Dropout, Activation ,RepeatVector
from keras.callbacks import ModelCheckpoint
import numpy as np
from keras.layers import Lambda, Concatenate
from keras import backend as K
from keras.models import load_model
import os
seq_length=150
features_num=5
LSTM_latent_dim=40
AE_latent_dim=10
encoder_inputs = Input(shape=(seq_length, features_num))
encoder = LSTM(LSTM_latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
merged_encoder_states = Concatenate(axis=-1)([state_h, state_c])
encoded_states=Dense(AE_latent_dim,activation='relu')(merged_encoder_states)
decoded_states=Dense(LSTM_latent_dim*2, activation='relu')(encoded_states)
decoder_inputs=Input(shape=(1, features_num))
decoder_lstm = LSTM(LSTM_latent_dim, return_sequences=True, return_state=True)
decoder_dense = Dense(features_num)
all_outputs = []
inputs = decoder_inputs
states=[decoded_states[:,:LSTM_latent_dim],decoded_states[:,LSTM_latent_dim:]]
for _ in range(seq_length):
# Run the decoder on one timestep
outputs, state_h, state_c = decoder_lstm(inputs, initial_state=states)
outputs = decoder_dense(outputs)
# Store the current prediction (we will concatenate all predictions later)
all_outputs.append(outputs)
# Reinject the outputs as inputs for the next loop iteration
# as well as update the states
inputs = outputs
states = [state_h, state_c]
# Concatenate all predictions
decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
#model = load_model('pre_model.h5')
filepath_for_w= 'AE2_p2p_s2s_model.h5'
try:
model = load_model(filepath_for_w) # if model was previouslly run, continue from it
print("loaded model")
except: print("new model")
print(model.summary())
model.compile(loss='mean_squared_error', optimizer='adam')
def create_wavelength(min_wavelength, max_wavelength, fluxes_in_wavelength, category ) :
#category :: 0 - train ; 2 - validate ; 4- test. 1;3;5 - dead space
c=(category+np.random.random())/6
k = fluxes_in_wavelength
#
base= (np.trunc(k*np.random.random()*(max_wavelength-min_wavelength)) +k*min_wavelength) /k
answer=base+c/k
return (answer)
def make_line(length,category):
shift= np.random.random()
wavelength = create_wavelength(30,10,1,category)
a=np.arange(length)
answer=np.sin(a/wavelength+shift)
return answer
def make_data(seq_num,seq_len,dim,category):
data=np.array([]).reshape(0,seq_len,dim)
for i in range (seq_num):
mini_data=np.array([]).reshape(0,seq_len)
for j in range (dim):
line = make_line(seq_len,category)
line=line.reshape(1,seq_len)
mini_data=np.append(mini_data,line,axis=0)
mini_data=np.swapaxes(mini_data,1,0)
mini_data=mini_data.reshape(1,seq_len,dim)
data=np.append(data,mini_data,axis=0)
return (data)
def train_generator():
while True:
sequence_length = seq_length+1
data=make_data(1000,sequence_length,features_num,0) # category=0 in train
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the first value in the sequence
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
yield [encoder_input_data, decoder_input_data], decoder_target_data
def val_generator():
while True:
sequence_length =seq_length+1
data=make_data(1000,sequence_length,features_num,2) # category=2 in val
#
#
# # decoder_target_data is the same as decoder_input_data but offset by one timestep
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the one before the last one.
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
yield [encoder_input_data, decoder_input_data], decoder_target_data
checkpointer=ModelCheckpoint(filepath_for_w, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
model.fit_generator(train_generator(),callbacks=[checkpointer], steps_per_epoch=30, epochs=2000, verbose=1,validation_data=val_generator(),validation_steps=30)
model.save(filepath_for_w)
def predict_wave(input_wave,input_for_decoder): # input wave= x[n,:,:], ie points except the last seq_length; each wave has feature_num features. run this function for all such instances (=n)
#print (input_wave.shape)
#print (input_for_decoder.shape)
pred= model.predict([input_wave,input_for_decoder])
#
return pred
def predict_many_waves_from_input(x):
x, x2=x # x == encoder_input_data ; x==2 decoder_input_data
#
instance_num= x.shape[0]
#
#
multi_predict_collection=np.zeros((x.shape[0],seq_length,x.shape[2]))
#
for n in range(instance_num):
input_wave=x[n,:,:].reshape(1,x.shape[1],x.shape[2])
input_for_decoder=x2[n,:,:].reshape(1,x2.shape[1],x2.shape[2])
wave_prediction=predict_wave(input_wave,input_for_decoder)
multi_predict_collection[n,:,:]=wave_prediction
return (multi_predict_collection)
def test_maker():
if True:
sequence_length = seq_length +1
data=make_data(470,sequence_length,features_num,4) # category=4 in test
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the first value
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
return [encoder_input_data, decoder_input_data], decoder_target_data
x,y= test_maker()
a=predict_many_waves_from_input (x)
x=x[0] # keep the wave (generated data except last seq_length time points)
print (x.shape)
print (y.shape)
print (a.shape)
np.save ('a.npy',a)
np.save ('y.npy',y)
np.save ('x.npy',x)
print (np.mean(np.absolute(y[:,:,0]-a[:,:,0])))
print (np.mean(np.absolute(y[:,:,1]-a[:,:,1])))
print (np.mean(np.absolute(y[:,:,2]-a[:,:,2])))
print (np.mean(np.absolute(y[:,:,3]-a[:,:,3])))
print (np.mean(np.absolute(y[:,:,4]-a[:,:,4])))
罪魁祸首可能是这一行:
states=[decoded_states[:,:LSTM_latent_dim],decoded_states[:,LSTM_latent_dim:]]
在组合编码 LSTM 的状态并将它们传递给自动编码器后,我将它们拆分回 c
和 h
(分别是细胞状态和隐藏状态)并提供它们进入解码器LSTM。
在我看来,这一步在使用初始模型时正确发生,但不知何故错误地保存到模型文件中(或错误地从模型文件中加载),导致加载模型有缺陷,这似乎是合理的。
在我看来,进一步支持我的评估的事实是,当这一行被替换为
states= [state_h, state_c]
,加载的模型能够 运行 正确(拟合和预测),但当然这会取消状态自动编码器,所以我不能使用它,除非放大错误。
所以,我想请你帮忙解决两个问题:
为什么会出现这个问题?
如何解决?
一个可能的部分解决方案是放弃整个模型的保存,而只保存(和加载)模型的权重。
替换行
model = load_model(filepath_for_w)
...
checkpointer=ModelCheckpoint(filepath_for_w, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
...
model.save(filepath_for_w)
和
model.load_weights(filepath_for_w)
...
checkpointer=ModelCheckpoint(filepath_for_w, save_weights_only=True, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
...
model.save_weights(filepath_for_w)
成功了。可以加载模型以进一步拟合和预测。
然而,这不允许保存整个模型;我仍然需要在代码中保留架构以便用权重填充它。它也没有解释为什么会出现这个问题。
我正在试验 LSTM,具体来说,将一个序列输入一个 LSTM,将状态转移到另一个 LSTM,并对序列进行解码。我在两个 LSTM 之间添加了一个自动编码器,通过较低维的潜在 space 对传输状态进行编码和解码。
这在我创建模型并进行拟合时效果很好。但是,如果我保存这个模型,然后尝试继续训练它,或者甚至只是在没有额外训练的情况下使用它,模型不会 运行 并且我会收到以下警告:
Traceback (most recent call last):
File "s2s_AE_2.py", line 140, in <module>
model.fit_generator(train_generator(),callbacks=[checkpointer], steps_per_epoch=30, epochs=2000, verbose=1,validation_data=val_generator(),validation_steps=30)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 2224, in fit_generator
class_weight=class_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1877, in train_on_batch
class_weight=class_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1476, in _standardize_user_data
exception_prefix='input')
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 86, in _standardize_input_data
str(len(data)) + ' arrays: ' + str(data)[:200] + '...')
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 2 arrays: [array([[[ 0.47338937, 0.75865918, 0.37731877, 0.63840222,
0.14653083],
[ 0.52119932, 0.78308798, 0.45885839, 0.66738276,
0.20393343],
[ 0.5674261 , 0.806364...
我的代码如下:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, TimeDistributed,Lambda, Dropout, Activation ,RepeatVector
from keras.callbacks import ModelCheckpoint
import numpy as np
from keras.layers import Lambda, Concatenate
from keras import backend as K
from keras.models import load_model
import os
seq_length=150
features_num=5
LSTM_latent_dim=40
AE_latent_dim=10
encoder_inputs = Input(shape=(seq_length, features_num))
encoder = LSTM(LSTM_latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
merged_encoder_states = Concatenate(axis=-1)([state_h, state_c])
encoded_states=Dense(AE_latent_dim,activation='relu')(merged_encoder_states)
decoded_states=Dense(LSTM_latent_dim*2, activation='relu')(encoded_states)
decoder_inputs=Input(shape=(1, features_num))
decoder_lstm = LSTM(LSTM_latent_dim, return_sequences=True, return_state=True)
decoder_dense = Dense(features_num)
all_outputs = []
inputs = decoder_inputs
states=[decoded_states[:,:LSTM_latent_dim],decoded_states[:,LSTM_latent_dim:]]
for _ in range(seq_length):
# Run the decoder on one timestep
outputs, state_h, state_c = decoder_lstm(inputs, initial_state=states)
outputs = decoder_dense(outputs)
# Store the current prediction (we will concatenate all predictions later)
all_outputs.append(outputs)
# Reinject the outputs as inputs for the next loop iteration
# as well as update the states
inputs = outputs
states = [state_h, state_c]
# Concatenate all predictions
decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
#model = load_model('pre_model.h5')
filepath_for_w= 'AE2_p2p_s2s_model.h5'
try:
model = load_model(filepath_for_w) # if model was previouslly run, continue from it
print("loaded model")
except: print("new model")
print(model.summary())
model.compile(loss='mean_squared_error', optimizer='adam')
def create_wavelength(min_wavelength, max_wavelength, fluxes_in_wavelength, category ) :
#category :: 0 - train ; 2 - validate ; 4- test. 1;3;5 - dead space
c=(category+np.random.random())/6
k = fluxes_in_wavelength
#
base= (np.trunc(k*np.random.random()*(max_wavelength-min_wavelength)) +k*min_wavelength) /k
answer=base+c/k
return (answer)
def make_line(length,category):
shift= np.random.random()
wavelength = create_wavelength(30,10,1,category)
a=np.arange(length)
answer=np.sin(a/wavelength+shift)
return answer
def make_data(seq_num,seq_len,dim,category):
data=np.array([]).reshape(0,seq_len,dim)
for i in range (seq_num):
mini_data=np.array([]).reshape(0,seq_len)
for j in range (dim):
line = make_line(seq_len,category)
line=line.reshape(1,seq_len)
mini_data=np.append(mini_data,line,axis=0)
mini_data=np.swapaxes(mini_data,1,0)
mini_data=mini_data.reshape(1,seq_len,dim)
data=np.append(data,mini_data,axis=0)
return (data)
def train_generator():
while True:
sequence_length = seq_length+1
data=make_data(1000,sequence_length,features_num,0) # category=0 in train
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the first value in the sequence
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
yield [encoder_input_data, decoder_input_data], decoder_target_data
def val_generator():
while True:
sequence_length =seq_length+1
data=make_data(1000,sequence_length,features_num,2) # category=2 in val
#
#
# # decoder_target_data is the same as decoder_input_data but offset by one timestep
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the one before the last one.
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
yield [encoder_input_data, decoder_input_data], decoder_target_data
checkpointer=ModelCheckpoint(filepath_for_w, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
model.fit_generator(train_generator(),callbacks=[checkpointer], steps_per_epoch=30, epochs=2000, verbose=1,validation_data=val_generator(),validation_steps=30)
model.save(filepath_for_w)
def predict_wave(input_wave,input_for_decoder): # input wave= x[n,:,:], ie points except the last seq_length; each wave has feature_num features. run this function for all such instances (=n)
#print (input_wave.shape)
#print (input_for_decoder.shape)
pred= model.predict([input_wave,input_for_decoder])
#
return pred
def predict_many_waves_from_input(x):
x, x2=x # x == encoder_input_data ; x==2 decoder_input_data
#
instance_num= x.shape[0]
#
#
multi_predict_collection=np.zeros((x.shape[0],seq_length,x.shape[2]))
#
for n in range(instance_num):
input_wave=x[n,:,:].reshape(1,x.shape[1],x.shape[2])
input_for_decoder=x2[n,:,:].reshape(1,x2.shape[1],x2.shape[2])
wave_prediction=predict_wave(input_wave,input_for_decoder)
multi_predict_collection[n,:,:]=wave_prediction
return (multi_predict_collection)
def test_maker():
if True:
sequence_length = seq_length +1
data=make_data(470,sequence_length,features_num,4) # category=4 in test
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the first value
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
return [encoder_input_data, decoder_input_data], decoder_target_data
x,y= test_maker()
a=predict_many_waves_from_input (x)
x=x[0] # keep the wave (generated data except last seq_length time points)
print (x.shape)
print (y.shape)
print (a.shape)
np.save ('a.npy',a)
np.save ('y.npy',y)
np.save ('x.npy',x)
print (np.mean(np.absolute(y[:,:,0]-a[:,:,0])))
print (np.mean(np.absolute(y[:,:,1]-a[:,:,1])))
print (np.mean(np.absolute(y[:,:,2]-a[:,:,2])))
print (np.mean(np.absolute(y[:,:,3]-a[:,:,3])))
print (np.mean(np.absolute(y[:,:,4]-a[:,:,4])))
罪魁祸首可能是这一行:
states=[decoded_states[:,:LSTM_latent_dim],decoded_states[:,LSTM_latent_dim:]]
在组合编码 LSTM 的状态并将它们传递给自动编码器后,我将它们拆分回 c
和 h
(分别是细胞状态和隐藏状态)并提供它们进入解码器LSTM。
在我看来,这一步在使用初始模型时正确发生,但不知何故错误地保存到模型文件中(或错误地从模型文件中加载),导致加载模型有缺陷,这似乎是合理的。
在我看来,进一步支持我的评估的事实是,当这一行被替换为
states= [state_h, state_c]
,加载的模型能够 运行 正确(拟合和预测),但当然这会取消状态自动编码器,所以我不能使用它,除非放大错误。
所以,我想请你帮忙解决两个问题:
为什么会出现这个问题?
如何解决?
一个可能的部分解决方案是放弃整个模型的保存,而只保存(和加载)模型的权重。
替换行
model = load_model(filepath_for_w)
...
checkpointer=ModelCheckpoint(filepath_for_w, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
...
model.save(filepath_for_w)
和
model.load_weights(filepath_for_w)
...
checkpointer=ModelCheckpoint(filepath_for_w, save_weights_only=True, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
...
model.save_weights(filepath_for_w)
成功了。可以加载模型以进一步拟合和预测。
然而,这不允许保存整个模型;我仍然需要在代码中保留架构以便用权重填充它。它也没有解释为什么会出现这个问题。