尝试在 UNET 中使用 Dice Loss

Trying to use Dice Loss with UNET

我正在尝试在 keras 网站上实施 UNET:

Image segmentation with a U-Net-like architecture

只有一个变化。使用骰子损失而不是“sparse_categorical_crossentropy”。但是,每次尝试时,都会出现不同的错误。我正在使用 Tensorflow 2.7 在 google colab 上编码。

例如,我尝试使用

def DiceLoss(targets, inputs, smooth=1e-6):
    
    #flatten label and prediction tensors
    
    inputs = K.flatten(inputs)
    targets = K.flatten(targets)
    
    intersection = K.sum(K.dot(targets, inputs))
    
    dice = (2*intersection + smooth) / (K.sum(targets) + K.sum(inputs) + smooth)
    
    return 1 - dice

我得到的错误:

ValueError:形状必须是等级 2 但对于 '{{node DiceLoss99/MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](DiceLoss99/Reshape_1, DiceLoss99/Reshape)' 输入形状:[?], [?].

问题出在这一行:

intersection = K.sum(K.dot(targets, inputs))

我也试过这个库:

 !pip install git+https://github.com/qubvel/segmentation_models
 # define optomizer
 n_classes=3
 LR = 0.0001
 optim = keras.optimizers.Adam(LR)
 dice_loss_sm = sm.losses.DiceLoss(class_weights=K.ones_like(n_classes))  

但是,我收到以下错误:

TypeError: 'Mul' Op 的输入 'y' 的 int32 类型与参数 'x'.[=55 的 float32 类型不匹配=]

其余代码与keras.io中的相同。但我在下面列出了完整性:

from tensorflow.keras import layers


def get_model(img_size, num_classes):
    inputs = keras.Input(shape=img_size + (3,))

    ### [First half of the network: downsampling inputs] ###

    # Entry block
    x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    # Blocks 1, 2, 3 are identical apart from the feature depth.
    for filters in [64, 128, 256]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    ### [Second half of the network: upsampling inputs] ###

    for filters in [256, 128, 64, 32]:
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.UpSampling2D(2)(x)

        # Project residual
        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding="same")(residual)
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    # Add a per-pixel classification layer
    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)

    # Define the model
    model = keras.Model(inputs, outputs)
    return model


# Free up RAM in case the model definition cells were run multiple times
keras.backend.clear_session()

# Build model
model = get_model(img_size, num_classes)
model.summary()
# Configure the model for training.
# We use the "sparse" version of categorical_crossentropy
# because our target data is integers.

#   notice I changed the lose the dice loss instead of sparse_categorical_crossentropy
model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy")

callbacks = [
    keras.callbacks.ModelCheckpoint("oxford_segmentation.h5", save_best_only=True)
]

# Train the model, doing validation at the end of each epoch.
epochs = 15
model.fit(train_gen, epochs=epochs, validation_data=val_gen, callbacks=callbacks)

编辑

segmentation_models 尝试丢失库时的详细错误消息:

这段代码的问题:

    backend = kwargs['backend']
     Args:
        gt: ground truth 4D keras tensor (B, H, W, C) or (B, C, H, W)
        pr: prediction 4D keras tensor (B, H, W, C) or (B, C, H, W)
        class_weights: 1. or list of class weights, len(weights) = C
        class_indexes: Optional integer or list of integers, classes to consider, if ``None`` all classes are used.
        beta: f-score coefficient
        smooth: value to avoid division by zero
        per_image: if ``True``, metric is calculated as mean over images in batch (B),
            else over whole batch
        threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction will not be round
    Returns:
        F-score in range [0, 1]
  

  """
Args:
        gt: ground truth 4D keras tensor (B, H, W, C) or (B, C, H, W)
        pr: prediction 4D keras tensor (B, H, W, C) or (B, C, H, W)
        class_weights: 1. or list of class weights, len(weights) = C
        class_indexes: Optional integer or list of integers, classes to consider, if ``None`` all classes are used.
        beta: f-score coefficient
        smooth: value to avoid division by zero
        per_image: if ``True``, metric is calculated as mean over images in batch (B),
            else over whole batch
        threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction will not be round
    Returns:
        F-score in range [0, 1]
    """
Args:
        gt: ground truth 4D keras tensor (B, H, W, C) or (B, C, H, W)
        pr: prediction 4D keras tensor (B, H, W, C) or (B, C, H, W)
        class_weights: 1. or list of class weights, len(weights) = C
        class_indexes: Optional integer or list of integers, classes to consider, if ``None`` all classes are used.
        beta: f-score coefficient
        smooth: value to avoid division by zero
        per_image: if ``True``, metric is calculated as mean over images in batch (B),
            else over whole batch
        threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction will not be round
    Returns:
        F-score in range [0, 1]
    """

        gt, pr = gather_channels(gt, pr, indexes=class_indexes, **kwargs)
        pr = round_if_needed(pr, threshold, **kwargs)
        axes = get_reduce_axes(per_image, **kwargs)
    
        # calculate score
        tp = backend.sum(gt * pr, axis=axes) # the issue here 
        fp = backend.sum(pr, axis=axes) - tp
        fn = backend.sum(gt, axis=axes) - tp
    
        score = ((1 + beta ** 2) * tp + smooth) \
                / ((1 + beta ** 2) * tp + beta ** 2 * fn + fp + smooth)
        score = average(score, per_image, class_weights, **kwargs)
    
        return score

gt、pr 和 axis 的代码在这里:

def get_reduce_axes(per_image, **kwargs):
    backend = kwargs['backend']
    axes = [1, 2] if backend.image_data_format() == 'channels_last' else [2, 3]
    if not per_image:
        axes.insert(0, 0)
    return axes


def gather_channels(*xs, indexes=None, **kwargs):
    """Slice tensors along channels axis by given indexes"""
    if indexes is None:
        return xs
    elif isinstance(indexes, (int)):
        indexes = [indexes]
    xs = [_gather_channels(x, indexes=indexes, **kwargs) for x in xs]
    return xs


def round_if_needed(x, threshold, **kwargs):
    backend = kwargs['backend']
    if threshold is not None:
        x = backend.greater(x, threshold)
        x = backend.cast(x, backend.floatx())
    return x

您将一维向量传递给 K.dot,而 ValueError 表示 K.dot 需要二维数组。

您可以将其替换为逐元素乘法,即 intersection = K.sum(targets *inputs)