Tensorflow 模型中的形状不匹配

Shapes mismatch in Tensorflow model

我写了如下模型fn:

from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf

def get_model(num_features, output_size, output_bias=None):
    output_bias = tf.keras.initializers.Constant(output_bias)

    opt = Adam(learning_rate=0.0008)

    inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
    layers = LSTM(32, activation='tanh')(
        inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))

    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(32, activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(32, activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(output_size, activation='sigmoid',
                         bias_initializer=output_bias)(layers)
    model = Model(inputs, layers)
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=2)])
    model.summary()
    return model

这是模型摘要:

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_6 (InputLayer)            [(None, None, 11)]   0                                            
__________________________________________________________________________________________________
input.row_lengths_5 (InstanceMe (None,)              0           input_6[0][0]                    
__________________________________________________________________________________________________
input.to_tensor_5 (InstanceMeth (None, None, 11)     0           input_6[0][0]                    
__________________________________________________________________________________________________
tf.sequence_mask_5 (TFOpLambda) (None, None)         0           input.row_lengths_5[0][0]        
__________________________________________________________________________________________________
lstm_5 (LSTM)                   (None, 32)           5632        input.to_tensor_5[0][0]          
                                                                 tf.sequence_mask_5[0][0]         
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 32)           128         lstm_5[0][0]                     
__________________________________________________________________________________________________
dropout_15 (Dropout)            (None, 32)           0           batch_normalization_15[0][0]     
__________________________________________________________________________________________________
dense_15 (Dense)                (None, 32)           1056        dropout_15[0][0]                 
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 32)           128         dense_15[0][0]                   
__________________________________________________________________________________________________
dropout_16 (Dropout)            (None, 32)           0           batch_normalization_16[0][0]     
__________________________________________________________________________________________________
dense_16 (Dense)                (None, 32)           1056        dropout_16[0][0]                 
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 32)           128         dense_16[0][0]                   
__________________________________________________________________________________________________
dropout_17 (Dropout)            (None, 32)           0           batch_normalization_17[0][0]     
__________________________________________________________________________________________________
dense_17 (Dense)                (None, 1)            33          dropout_17[0][0]                 
==================================================================================================
Total params: 8,161
Trainable params: 7,969
Non-trainable params: 192
__________________________________________________________________________________________________

这是我的数据的形状:

print(train_x.shape,train_y.shape)
print(val_x.shape,val_y.shape)

(52499, None, 11) (52499,)
(17500, None, 11) (17500,)

尝试拟合我的模型时,出现以下错误:

model.fit(train_x, train_y, epochs=300, batch_size=500, validation_data=(val_x, val_y))

ValueError: Dimension 0 in both shapes must be equal, but are 2 and 1. Shapes are [2] and [1].

我不明白这些形状有什么问题。

您的模型看起来不错。问题是你 运行 进入了一个开放的 issuetfa.metrics.F1Score。对于二进制情况,您必须将 F1Score 的参数更改为 tfa.metrics.F1Score(num_classes=1, threshold=0.5)。这是一个完整的工作示例:

from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf

def get_model(num_features, output_size, output_bias=0.001):
    output_bias = tf.keras.initializers.Constant(output_bias)

    opt = Adam(learning_rate=0.0008)

    inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
    layers = LSTM(32, activation='tanh')(
        inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))

    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(32, activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(32, activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(output_size, activation='sigmoid',
                         bias_initializer=output_bias)(layers)
    model = Model(inputs, layers)
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=1, threshold=0.5)])
    model.summary()
    return model


model = get_model(11, 1)
rt = tf.RaggedTensor.from_row_splits(values=tf.ones([5, 11], tf.int32),
                                  row_splits=[0, 2, 5])
model.fit(rt, tf.random.uniform((2,1), maxval=2), epochs=300, batch_size=2, verbose=2)

或者,您只需定义自己的 F1Score 方法并将其设置为模型中的指标。有关详细信息,请参阅此 post